xref: /csrg-svn/sys/vm/swap_pager.c (revision 64827)
1 /*
2  * Copyright (c) 1990 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
13  *
14  *	@(#)swap_pager.c	8.3 (Berkeley) 11/10/93
15  */
16 
17 /*
18  * Quick hack to page to dedicated partition(s).
19  * TODO:
20  *	Add multiprocessor locks
21  *	Deal with async writes in a better fashion
22  */
23 
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/proc.h>
27 #include <sys/buf.h>
28 #include <sys/map.h>
29 #include <sys/vnode.h>
30 #include <sys/malloc.h>
31 
32 #include <miscfs/specfs/specdev.h>
33 
34 #include <vm/vm.h>
35 #include <vm/vm_page.h>
36 #include <vm/vm_pageout.h>
37 #include <vm/swap_pager.h>
38 
39 #define NSWSIZES	16	/* size of swtab */
40 #define NPENDINGIO	64	/* max # of pending cleans */
41 #define MAXDADDRS	64	/* max # of disk addrs for fixed allocations */
42 
43 #ifdef DEBUG
44 int	swpagerdebug = 0x100;
45 #define	SDB_FOLLOW	0x001
46 #define SDB_INIT	0x002
47 #define SDB_ALLOC	0x004
48 #define SDB_IO		0x008
49 #define SDB_WRITE	0x010
50 #define SDB_FAIL	0x020
51 #define SDB_ALLOCBLK	0x040
52 #define SDB_FULL	0x080
53 #define SDB_ANOM	0x100
54 #define SDB_ANOMPANIC	0x200
55 #endif
56 
57 struct swpagerclean {
58 	queue_head_t		spc_list;
59 	int			spc_flags;
60 	struct buf		*spc_bp;
61 	sw_pager_t		spc_swp;
62 	vm_offset_t		spc_kva;
63 	vm_page_t		spc_m;
64 } swcleanlist[NPENDINGIO];
65 typedef struct swpagerclean *swp_clean_t;
66 
67 
68 /* spc_flags values */
69 #define SPC_FREE	0x00
70 #define SPC_BUSY	0x01
71 #define SPC_DONE	0x02
72 #define SPC_ERROR	0x04
73 #define SPC_DIRTY	0x08
74 
75 struct swtab {
76 	vm_size_t st_osize;	/* size of object (bytes) */
77 	int	  st_bsize;	/* vs. size of swap block (DEV_BSIZE units) */
78 #ifdef DEBUG
79 	u_long	  st_inuse;	/* number in this range in use */
80 	u_long	  st_usecnt;	/* total used of this size */
81 #endif
82 } swtab[NSWSIZES+1];
83 
84 #ifdef DEBUG
85 int		swap_pager_pendingio;	/* max pending async "clean" ops */
86 int		swap_pager_poip;	/* pageouts in progress */
87 int		swap_pager_piip;	/* pageins in progress */
88 #endif
89 
90 queue_head_t	swap_pager_inuse;	/* list of pending page cleans */
91 queue_head_t	swap_pager_free;	/* list of free pager clean structs */
92 queue_head_t	swap_pager_list;	/* list of "named" anon regions */
93 
94 static int		swap_pager_finish __P((swp_clean_t));
95 static void 		swap_pager_init __P((void));
96 static vm_pager_t	swap_pager_alloc
97 			    __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
98 static boolean_t	swap_pager_clean __P((vm_page_t, int));
99 static void		swap_pager_dealloc __P((vm_pager_t));
100 static int		swap_pager_getpage
101 			    __P((vm_pager_t, vm_page_t, boolean_t));
102 static boolean_t	swap_pager_haspage __P((vm_pager_t, vm_offset_t));
103 static int		swap_pager_io __P((sw_pager_t, vm_page_t, int));
104 static void		swap_pager_iodone __P((struct buf *));
105 static int		swap_pager_putpage
106 			    __P((vm_pager_t, vm_page_t, boolean_t));
107 
108 struct pagerops swappagerops = {
109 	swap_pager_init,
110 	swap_pager_alloc,
111 	swap_pager_dealloc,
112 	swap_pager_getpage,
113 	swap_pager_putpage,
114 	swap_pager_haspage
115 };
116 
117 static void
118 swap_pager_init()
119 {
120 	register swp_clean_t spc;
121 	register int i, bsize;
122 	extern int dmmin, dmmax;
123 	int maxbsize;
124 
125 #ifdef DEBUG
126 	if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
127 		printf("swpg_init()\n");
128 #endif
129 	dfltpagerops = &swappagerops;
130 	queue_init(&swap_pager_list);
131 
132 	/*
133 	 * Initialize clean lists
134 	 */
135 	queue_init(&swap_pager_inuse);
136 	queue_init(&swap_pager_free);
137 	for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) {
138 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
139 		spc->spc_flags = SPC_FREE;
140 	}
141 
142 	/*
143 	 * Calculate the swap allocation constants.
144 	 */
145         if (dmmin == 0) {
146                 dmmin = DMMIN;
147 		if (dmmin < CLBYTES/DEV_BSIZE)
148 			dmmin = CLBYTES/DEV_BSIZE;
149 	}
150         if (dmmax == 0)
151                 dmmax = DMMAX;
152 
153 	/*
154 	 * Fill in our table of object size vs. allocation size
155 	 */
156 	bsize = btodb(PAGE_SIZE);
157 	if (bsize < dmmin)
158 		bsize = dmmin;
159 	maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
160 	if (maxbsize > dmmax)
161 		maxbsize = dmmax;
162 	for (i = 0; i < NSWSIZES; i++) {
163 		swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
164 		swtab[i].st_bsize = bsize;
165 #ifdef DEBUG
166 		if (swpagerdebug & SDB_INIT)
167 			printf("swpg_init: ix %d, size %x, bsize %x\n",
168 			       i, swtab[i].st_osize, swtab[i].st_bsize);
169 #endif
170 		if (bsize >= maxbsize)
171 			break;
172 		bsize *= 2;
173 	}
174 	swtab[i].st_osize = 0;
175 	swtab[i].st_bsize = bsize;
176 }
177 
178 /*
179  * Allocate a pager structure and associated resources.
180  * Note that if we are called from the pageout daemon (handle == NULL)
181  * we should not wait for memory as it could resulting in deadlock.
182  */
183 static vm_pager_t
184 swap_pager_alloc(handle, size, prot, foff)
185 	caddr_t handle;
186 	register vm_size_t size;
187 	vm_prot_t prot;
188 	vm_offset_t foff;
189 {
190 	register vm_pager_t pager;
191 	register sw_pager_t swp;
192 	struct swtab *swt;
193 	int waitok;
194 
195 #ifdef DEBUG
196 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
197 		printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
198 #endif
199 	/*
200 	 * If this is a "named" anonymous region, look it up and
201 	 * return the appropriate pager if it exists.
202 	 */
203 	if (handle) {
204 		pager = vm_pager_lookup(&swap_pager_list, handle);
205 		if (pager != NULL) {
206 			/*
207 			 * Use vm_object_lookup to gain a reference
208 			 * to the object and also to remove from the
209 			 * object cache.
210 			 */
211 			if (vm_object_lookup(pager) == NULL)
212 				panic("swap_pager_alloc: bad object");
213 			return(pager);
214 		}
215 	}
216 	/*
217 	 * Pager doesn't exist, allocate swap management resources
218 	 * and initialize.
219 	 */
220 	waitok = handle ? M_WAITOK : M_NOWAIT;
221 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
222 	if (pager == NULL)
223 		return(NULL);
224 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
225 	if (swp == NULL) {
226 #ifdef DEBUG
227 		if (swpagerdebug & SDB_FAIL)
228 			printf("swpg_alloc: swpager malloc failed\n");
229 #endif
230 		free((caddr_t)pager, M_VMPAGER);
231 		return(NULL);
232 	}
233 	size = round_page(size);
234 	for (swt = swtab; swt->st_osize; swt++)
235 		if (size <= swt->st_osize)
236 			break;
237 #ifdef DEBUG
238 	swt->st_inuse++;
239 	swt->st_usecnt++;
240 #endif
241 	swp->sw_osize = size;
242 	swp->sw_bsize = swt->st_bsize;
243 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
244 	swp->sw_blocks = (sw_blk_t)
245 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
246 		       M_VMPGDATA, M_NOWAIT);
247 	if (swp->sw_blocks == NULL) {
248 		free((caddr_t)swp, M_VMPGDATA);
249 		free((caddr_t)pager, M_VMPAGER);
250 #ifdef DEBUG
251 		if (swpagerdebug & SDB_FAIL)
252 			printf("swpg_alloc: sw_blocks malloc failed\n");
253 		swt->st_inuse--;
254 		swt->st_usecnt--;
255 #endif
256 		return(FALSE);
257 	}
258 	bzero((caddr_t)swp->sw_blocks,
259 	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
260 	swp->sw_poip = 0;
261 	if (handle) {
262 		vm_object_t object;
263 
264 		swp->sw_flags = SW_NAMED;
265 		queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list);
266 		/*
267 		 * Consistant with other pagers: return with object
268 		 * referenced.  Can't do this with handle == NULL
269 		 * since it might be the pageout daemon calling.
270 		 */
271 		object = vm_object_allocate(size);
272 		vm_object_enter(object, pager);
273 		vm_object_setpager(object, pager, 0, FALSE);
274 	} else {
275 		swp->sw_flags = 0;
276 		queue_init(&pager->pg_list);
277 	}
278 	pager->pg_handle = handle;
279 	pager->pg_ops = &swappagerops;
280 	pager->pg_type = PG_SWAP;
281 	pager->pg_data = (caddr_t)swp;
282 
283 #ifdef DEBUG
284 	if (swpagerdebug & SDB_ALLOC)
285 		printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
286 		       swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
287 #endif
288 	return(pager);
289 }
290 
291 static void
292 swap_pager_dealloc(pager)
293 	vm_pager_t pager;
294 {
295 	register int i;
296 	register sw_blk_t bp;
297 	register sw_pager_t swp;
298 	struct swtab *swt;
299 	int s;
300 
301 #ifdef DEBUG
302 	/* save panic time state */
303 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
304 		return;
305 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
306 		printf("swpg_dealloc(%x)\n", pager);
307 #endif
308 	/*
309 	 * Remove from list right away so lookups will fail if we
310 	 * block for pageout completion.
311 	 */
312 	swp = (sw_pager_t) pager->pg_data;
313 	if (swp->sw_flags & SW_NAMED) {
314 		queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list);
315 		swp->sw_flags &= ~SW_NAMED;
316 	}
317 #ifdef DEBUG
318 	for (swt = swtab; swt->st_osize; swt++)
319 		if (swp->sw_osize <= swt->st_osize)
320 			break;
321 	swt->st_inuse--;
322 #endif
323 
324 	/*
325 	 * Wait for all pageouts to finish and remove
326 	 * all entries from cleaning list.
327 	 */
328 	s = splbio();
329 	while (swp->sw_poip) {
330 		swp->sw_flags |= SW_WANTED;
331 		assert_wait((int)swp, 0);
332 		thread_block();
333 	}
334 	splx(s);
335 	(void) swap_pager_clean(NULL, B_WRITE);
336 
337 	/*
338 	 * Free left over swap blocks
339 	 */
340 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
341 		if (bp->swb_block) {
342 #ifdef DEBUG
343 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
344 				printf("swpg_dealloc: blk %x\n",
345 				       bp->swb_block);
346 #endif
347 			rmfree(swapmap, swp->sw_bsize, bp->swb_block);
348 		}
349 	/*
350 	 * Free swap management resources
351 	 */
352 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
353 	free((caddr_t)swp, M_VMPGDATA);
354 	free((caddr_t)pager, M_VMPAGER);
355 }
356 
357 static int
358 swap_pager_getpage(pager, m, sync)
359 	vm_pager_t pager;
360 	vm_page_t m;
361 	boolean_t sync;
362 {
363 #ifdef DEBUG
364 	if (swpagerdebug & SDB_FOLLOW)
365 		printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync);
366 #endif
367 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ));
368 }
369 
370 static int
371 swap_pager_putpage(pager, m, sync)
372 	vm_pager_t pager;
373 	vm_page_t m;
374 	boolean_t sync;
375 {
376 	int flags;
377 
378 #ifdef DEBUG
379 	if (swpagerdebug & SDB_FOLLOW)
380 		printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync);
381 #endif
382 	if (pager == NULL) {
383 		(void) swap_pager_clean(NULL, B_WRITE);
384 		return (VM_PAGER_OK);		/* ??? */
385 	}
386 	flags = B_WRITE;
387 	if (!sync)
388 		flags |= B_ASYNC;
389 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags));
390 }
391 
392 static boolean_t
393 swap_pager_haspage(pager, offset)
394 	vm_pager_t pager;
395 	vm_offset_t offset;
396 {
397 	register sw_pager_t swp;
398 	register sw_blk_t swb;
399 	int ix;
400 
401 #ifdef DEBUG
402 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
403 		printf("swpg_haspage(%x, %x) ", pager, offset);
404 #endif
405 	swp = (sw_pager_t) pager->pg_data;
406 	ix = offset / dbtob(swp->sw_bsize);
407 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
408 #ifdef DEBUG
409 		if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
410 			printf("swpg_haspage: %x bad offset %x, ix %x\n",
411 			       swp->sw_blocks, offset, ix);
412 #endif
413 		return(FALSE);
414 	}
415 	swb = &swp->sw_blocks[ix];
416 	if (swb->swb_block)
417 		ix = atop(offset % dbtob(swp->sw_bsize));
418 #ifdef DEBUG
419 	if (swpagerdebug & SDB_ALLOCBLK)
420 		printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
421 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
422 		printf("-> %c\n",
423 		       "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
424 #endif
425 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
426 		return(TRUE);
427 	return(FALSE);
428 }
429 
430 /*
431  * Scaled down version of swap().
432  * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
433  * BOGUS:  lower level IO routines expect a KVA so we have to map our
434  * provided physical page into the KVA to keep them happy.
435  */
436 static int
437 swap_pager_io(swp, m, flags)
438 	register sw_pager_t swp;
439 	vm_page_t m;
440 	int flags;
441 {
442 	register struct buf *bp;
443 	register sw_blk_t swb;
444 	register int s;
445 	int ix;
446 	boolean_t rv;
447 	vm_offset_t kva, off;
448 	swp_clean_t spc;
449 
450 #ifdef DEBUG
451 	/* save panic time state */
452 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
453 		return (VM_PAGER_FAIL);		/* XXX: correct return? */
454 	if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
455 		printf("swpg_io(%x, %x, %x)\n", swp, m, flags);
456 #endif
457 
458 	/*
459 	 * For reads (pageins) and synchronous writes, we clean up
460 	 * all completed async pageouts.
461 	 */
462 	if ((flags & B_ASYNC) == 0) {
463 		s = splbio();
464 #ifdef DEBUG
465 		/*
466 		 * Check to see if this page is currently being cleaned.
467 		 * If it is, we just wait til the operation is done before
468 		 * continuing.
469 		 */
470 		while (swap_pager_clean(m, flags&B_READ)) {
471 			if (swpagerdebug & SDB_ANOM)
472 				printf("swap_pager_io: page %x cleaning\n", m);
473 
474 			swp->sw_flags |= SW_WANTED;
475 			assert_wait((int)swp, 0);
476 			thread_block();
477 		}
478 #else
479 		(void) swap_pager_clean(m, flags&B_READ);
480 #endif
481 		splx(s);
482 	}
483 	/*
484 	 * For async writes (pageouts), we cleanup completed pageouts so
485 	 * that all available resources are freed.  Also tells us if this
486 	 * page is already being cleaned.  If it is, or no resources
487 	 * are available, we try again later.
488 	 */
489 	else if (swap_pager_clean(m, B_WRITE) ||
490 		 queue_empty(&swap_pager_free)) {
491 #ifdef DEBUG
492 		if ((swpagerdebug & SDB_ANOM) &&
493 		    !queue_empty(&swap_pager_free))
494 			printf("swap_pager_io: page %x already cleaning\n", m);
495 #endif
496 		return(VM_PAGER_FAIL);
497 	}
498 
499 	/*
500 	 * Determine swap block and allocate as necessary.
501 	 */
502 	off = m->offset + m->object->paging_offset;
503 	ix = off / dbtob(swp->sw_bsize);
504 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
505 #ifdef DEBUG
506 		if (swpagerdebug & SDB_FAIL)
507 			printf("swpg_io: bad offset %x+%x(%d) in %x\n",
508 			       m->offset, m->object->paging_offset,
509 			       ix, swp->sw_blocks);
510 #endif
511 		return(VM_PAGER_FAIL);
512 	}
513 	swb = &swp->sw_blocks[ix];
514 	off = off % dbtob(swp->sw_bsize);
515 	if (flags & B_READ) {
516 		if (swb->swb_block == 0 ||
517 		    (swb->swb_mask & (1 << atop(off))) == 0) {
518 #ifdef DEBUG
519 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL))
520 				printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n",
521 				       swp->sw_blocks,
522 				       swb->swb_block, atop(off),
523 				       swb->swb_mask,
524 				       m->offset, m->object->paging_offset);
525 #endif
526 			/* XXX: should we zero page here?? */
527 			return(VM_PAGER_FAIL);
528 		}
529 	} else if (swb->swb_block == 0) {
530 		swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
531 		if (swb->swb_block == 0) {
532 #ifdef DEBUG
533 			if (swpagerdebug & SDB_FAIL)
534 				printf("swpg_io: rmalloc of %x failed\n",
535 				       swp->sw_bsize);
536 #endif
537 			return(VM_PAGER_FAIL);
538 		}
539 #ifdef DEBUG
540 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
541 			printf("swpg_io: %x alloc blk %x at ix %x\n",
542 			       swp->sw_blocks, swb->swb_block, ix);
543 #endif
544 	}
545 
546 	/*
547 	 * Allocate a kernel virtual address and initialize so that PTE
548 	 * is available for lower level IO drivers.
549 	 */
550 	kva = vm_pager_map_page(m);
551 
552 	/*
553 	 * Get a swap buffer header and perform the IO
554 	 */
555 	s = splbio();
556 	while (bswlist.b_actf == NULL) {
557 #ifdef DEBUG
558 		if (swpagerdebug & SDB_ANOM)
559 			printf("swap_pager_io: wait on swbuf for %x (%d)\n",
560 			       m, flags);
561 #endif
562 		bswlist.b_flags |= B_WANTED;
563 		sleep((caddr_t)&bswlist, PSWP+1);
564 	}
565 	bp = bswlist.b_actf;
566 	bswlist.b_actf = bp->b_actf;
567 	splx(s);
568 	bp->b_flags = B_BUSY | (flags & B_READ);
569 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
570 	bp->b_data = (caddr_t)kva;
571 	bp->b_blkno = swb->swb_block + btodb(off);
572 	VHOLD(swapdev_vp);
573 	bp->b_vp = swapdev_vp;
574 	if (swapdev_vp->v_type == VBLK)
575 		bp->b_dev = swapdev_vp->v_rdev;
576 	bp->b_bcount = PAGE_SIZE;
577 	if ((bp->b_flags & B_READ) == 0) {
578 		bp->b_dirtyoff = 0;
579 		bp->b_dirtyend = PAGE_SIZE;
580 		swapdev_vp->v_numoutput++;
581 	}
582 
583 	/*
584 	 * If this is an async write we set up additional buffer fields
585 	 * and place a "cleaning" entry on the inuse queue.
586 	 */
587 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
588 #ifdef DEBUG
589 		if (queue_empty(&swap_pager_free))
590 			panic("swpg_io: lost spc");
591 #endif
592 		queue_remove_first(&swap_pager_free,
593 				   spc, swp_clean_t, spc_list);
594 #ifdef DEBUG
595 		if (spc->spc_flags != SPC_FREE)
596 			panic("swpg_io: bad free spc");
597 #endif
598 		spc->spc_flags = SPC_BUSY;
599 		spc->spc_bp = bp;
600 		spc->spc_swp = swp;
601 		spc->spc_kva = kva;
602 		spc->spc_m = m;
603 		bp->b_flags |= B_CALL;
604 		bp->b_iodone = swap_pager_iodone;
605 		s = splbio();
606 		swp->sw_poip++;
607 		queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list);
608 
609 #ifdef DEBUG
610 		swap_pager_poip++;
611 		if (swpagerdebug & SDB_WRITE)
612 			printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
613 			       bp, swp, spc, swp->sw_poip);
614 		if ((swpagerdebug & SDB_ALLOCBLK) &&
615 		    (swb->swb_mask & (1 << atop(off))) == 0)
616 			printf("swpg_io: %x write blk %x+%x\n",
617 			       swp->sw_blocks, swb->swb_block, atop(off));
618 #endif
619 		swb->swb_mask |= (1 << atop(off));
620 		splx(s);
621 	}
622 #ifdef DEBUG
623 	if (swpagerdebug & SDB_IO)
624 		printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
625 		       bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
626 #endif
627 	VOP_STRATEGY(bp);
628 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
629 #ifdef DEBUG
630 		if (swpagerdebug & SDB_IO)
631 			printf("swpg_io:  IO started: bp %x\n", bp);
632 #endif
633 		return(VM_PAGER_PEND);
634 	}
635 	s = splbio();
636 #ifdef DEBUG
637 	if (flags & B_READ)
638 		swap_pager_piip++;
639 	else
640 		swap_pager_poip++;
641 #endif
642 	while ((bp->b_flags & B_DONE) == 0) {
643 		assert_wait((int)bp, 0);
644 		thread_block();
645 	}
646 #ifdef DEBUG
647 	if (flags & B_READ)
648 		--swap_pager_piip;
649 	else
650 		--swap_pager_poip;
651 #endif
652 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
653 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
654 	bp->b_actf = bswlist.b_actf;
655 	bswlist.b_actf = bp;
656 	if (bp->b_vp)
657 		brelvp(bp);
658 	if (bswlist.b_flags & B_WANTED) {
659 		bswlist.b_flags &= ~B_WANTED;
660 		thread_wakeup((int)&bswlist);
661 	}
662 	if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
663 		m->flags |= PG_CLEAN;
664 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
665 	}
666 	splx(s);
667 #ifdef DEBUG
668 	if (swpagerdebug & SDB_IO)
669 		printf("swpg_io:  IO done: bp %x, rv %d\n", bp, rv);
670 	if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
671 		printf("swpg_io: IO error\n");
672 #endif
673 	vm_pager_unmap_page(kva);
674 	return(rv);
675 }
676 
677 static boolean_t
678 swap_pager_clean(m, rw)
679 	vm_page_t m;
680 	int rw;
681 {
682 	register swp_clean_t spc, tspc;
683 	register int s;
684 
685 #ifdef DEBUG
686 	/* save panic time state */
687 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
688 		return (FALSE);			/* ??? */
689 	if (swpagerdebug & SDB_FOLLOW)
690 		printf("swpg_clean(%x, %d)\n", m, rw);
691 #endif
692 	tspc = NULL;
693 	for (;;) {
694 		/*
695 		 * Look up and removal from inuse list must be done
696 		 * at splbio() to avoid conflicts with swap_pager_iodone.
697 		 */
698 		s = splbio();
699 		spc = (swp_clean_t) queue_first(&swap_pager_inuse);
700 		while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
701 			if ((spc->spc_flags & SPC_DONE) &&
702 			    swap_pager_finish(spc)) {
703 				queue_remove(&swap_pager_inuse, spc,
704 					     swp_clean_t, spc_list);
705 				break;
706 			}
707 			if (m && m == spc->spc_m) {
708 #ifdef DEBUG
709 				if (swpagerdebug & SDB_ANOM)
710 					printf("swap_pager_clean: page %x on list, flags %x\n",
711 					       m, spc->spc_flags);
712 #endif
713 				tspc = spc;
714 			}
715 			spc = (swp_clean_t) queue_next(&spc->spc_list);
716 		}
717 
718 		/*
719 		 * No operations done, thats all we can do for now.
720 		 */
721 		if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
722 			break;
723 		splx(s);
724 
725 		/*
726 		 * The desired page was found to be busy earlier in
727 		 * the scan but has since completed.
728 		 */
729 		if (tspc && tspc == spc) {
730 #ifdef DEBUG
731 			if (swpagerdebug & SDB_ANOM)
732 				printf("swap_pager_clean: page %x done while looking\n",
733 				       m);
734 #endif
735 			tspc = NULL;
736 		}
737 		spc->spc_flags = SPC_FREE;
738 		vm_pager_unmap_page(spc->spc_kva);
739 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
740 #ifdef DEBUG
741 		if (swpagerdebug & SDB_WRITE)
742 			printf("swpg_clean: free spc %x\n", spc);
743 #endif
744 	}
745 #ifdef DEBUG
746 	/*
747 	 * If we found that the desired page is already being cleaned
748 	 * mark it so that swap_pager_iodone() will not set the clean
749 	 * flag before the pageout daemon has another chance to clean it.
750 	 */
751 	if (tspc && rw == B_WRITE) {
752 		if (swpagerdebug & SDB_ANOM)
753 			printf("swap_pager_clean: page %x on clean list\n",
754 			       tspc);
755 		tspc->spc_flags |= SPC_DIRTY;
756 	}
757 #endif
758 	splx(s);
759 
760 #ifdef DEBUG
761 	if (swpagerdebug & SDB_WRITE)
762 		printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE);
763 	if ((swpagerdebug & SDB_ANOM) && tspc)
764 		printf("swpg_clean: %s of cleaning page %x\n",
765 		       rw == B_READ ? "get" : "put", m);
766 #endif
767 	return(tspc ? TRUE : FALSE);
768 }
769 
770 static int
771 swap_pager_finish(spc)
772 	register swp_clean_t spc;
773 {
774 	vm_object_t object = spc->spc_m->object;
775 
776 	/*
777 	 * Mark the paging operation as done.
778 	 * (XXX) If we cannot get the lock, leave it til later.
779 	 * (XXX) Also we are assuming that an async write is a
780 	 *       pageout operation that has incremented the counter.
781 	 */
782 	if (!vm_object_lock_try(object))
783 		return(0);
784 
785 	if (--object->paging_in_progress == 0)
786 		thread_wakeup((int) object);
787 
788 #ifdef DEBUG
789 	/*
790 	 * XXX: this isn't even close to the right thing to do,
791 	 * introduces a variety of race conditions.
792 	 *
793 	 * If dirty, vm_pageout() has attempted to clean the page
794 	 * again.  In this case we do not do anything as we will
795 	 * see the page again shortly.
796 	 */
797 	if (spc->spc_flags & SPC_DIRTY) {
798 		if (swpagerdebug & SDB_ANOM)
799 			printf("swap_pager_finish: page %x dirty again\n",
800 			       spc->spc_m);
801 		spc->spc_m->flags &= ~PG_BUSY;
802 		PAGE_WAKEUP(spc->spc_m);
803 		vm_object_unlock(object);
804 		return(1);
805 	}
806 #endif
807 	/*
808 	 * If no error mark as clean and inform the pmap system.
809 	 * If error, mark as dirty so we will try again.
810 	 * (XXX could get stuck doing this, should give up after awhile)
811 	 */
812 	if (spc->spc_flags & SPC_ERROR) {
813 		printf("swap_pager_finish: clean of page %x failed\n",
814 		       VM_PAGE_TO_PHYS(spc->spc_m));
815 		spc->spc_m->flags |= PG_LAUNDRY;
816 	} else {
817 		spc->spc_m->flags |= PG_CLEAN;
818 		pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
819 	}
820 	spc->spc_m->flags &= ~PG_BUSY;
821 	PAGE_WAKEUP(spc->spc_m);
822 
823 	vm_object_unlock(object);
824 	return(1);
825 }
826 
827 static void
828 swap_pager_iodone(bp)
829 	register struct buf *bp;
830 {
831 	register swp_clean_t spc;
832 	daddr_t blk;
833 	int s;
834 
835 #ifdef DEBUG
836 	/* save panic time state */
837 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
838 		return;
839 	if (swpagerdebug & SDB_FOLLOW)
840 		printf("swpg_iodone(%x)\n", bp);
841 #endif
842 	s = splbio();
843 	spc = (swp_clean_t) queue_first(&swap_pager_inuse);
844 	while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
845 		if (spc->spc_bp == bp)
846 			break;
847 		spc = (swp_clean_t) queue_next(&spc->spc_list);
848 	}
849 #ifdef DEBUG
850 	if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
851 		panic("swap_pager_iodone: bp not found");
852 #endif
853 
854 	spc->spc_flags &= ~SPC_BUSY;
855 	spc->spc_flags |= SPC_DONE;
856 	if (bp->b_flags & B_ERROR)
857 		spc->spc_flags |= SPC_ERROR;
858 	spc->spc_bp = NULL;
859 	blk = bp->b_blkno;
860 
861 #ifdef DEBUG
862 	--swap_pager_poip;
863 	if (swpagerdebug & SDB_WRITE)
864 		printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
865 		       bp, spc->spc_swp, spc->spc_swp->sw_flags,
866 		       spc, spc->spc_swp->sw_poip);
867 #endif
868 
869 	spc->spc_swp->sw_poip--;
870 	if (spc->spc_swp->sw_flags & SW_WANTED) {
871 		spc->spc_swp->sw_flags &= ~SW_WANTED;
872 		thread_wakeup((int)spc->spc_swp);
873 	}
874 
875 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
876 	bp->b_actf = bswlist.b_actf;
877 	bswlist.b_actf = bp;
878 	if (bp->b_vp)
879 		brelvp(bp);
880 	if (bswlist.b_flags & B_WANTED) {
881 		bswlist.b_flags &= ~B_WANTED;
882 		thread_wakeup((int)&bswlist);
883 	}
884 	/*
885 	 * Only kick the pageout daemon if we are really hurting
886 	 * for pages, otherwise this page will be picked up later.
887 	 */
888 	if (cnt.v_free_count < cnt.v_free_min)
889 		thread_wakeup((int) &vm_pages_needed);
890 	splx(s);
891 }
892