xref: /csrg-svn/sys/vm/swap_pager.c (revision 53213)
1 /*
2  * Copyright (c) 1990 University of Utah.
3  * Copyright (c) 1991 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
13  *
14  *	@(#)swap_pager.c	7.6 (Berkeley) 04/20/92
15  */
16 
17 /*
18  * Quick hack to page to dedicated partition(s).
19  * TODO:
20  *	Add multiprocessor locks
21  *	Deal with async writes in a better fashion
22  */
23 
24 #include "swappager.h"
25 #if NSWAPPAGER > 0
26 
27 #include "param.h"
28 #include "proc.h"
29 #include "buf.h"
30 #include "map.h"
31 #include "systm.h"
32 #include "specdev.h"
33 #include "vnode.h"
34 #include "malloc.h"
35 #include "queue.h"
36 
37 #include "vm.h"
38 #include "vm_page.h"
39 #include "vm_pageout.h"
40 #include "swap_pager.h"
41 
42 #define NSWSIZES	16	/* size of swtab */
43 #define NPENDINGIO	64	/* max # of pending cleans */
44 #define MAXDADDRS	64	/* max # of disk addrs for fixed allocations */
45 
46 #ifdef DEBUG
47 int	swpagerdebug = 0x100;
48 #define	SDB_FOLLOW	0x001
49 #define SDB_INIT	0x002
50 #define SDB_ALLOC	0x004
51 #define SDB_IO		0x008
52 #define SDB_WRITE	0x010
53 #define SDB_FAIL	0x020
54 #define SDB_ALLOCBLK	0x040
55 #define SDB_FULL	0x080
56 #define SDB_ANOM	0x100
57 #define SDB_ANOMPANIC	0x200
58 #endif
59 
60 struct swpagerclean {
61 	queue_head_t		spc_list;
62 	int			spc_flags;
63 	struct buf		*spc_bp;
64 	sw_pager_t		spc_swp;
65 	vm_offset_t		spc_kva;
66 	vm_page_t		spc_m;
67 } swcleanlist[NPENDINGIO];
68 typedef	struct swpagerclean	*swp_clean_t;
69 
70 /* spc_flags values */
71 #define SPC_FREE	0x00
72 #define SPC_BUSY	0x01
73 #define SPC_DONE	0x02
74 #define SPC_ERROR	0x04
75 #define SPC_DIRTY	0x08
76 
77 struct swtab {
78 	vm_size_t st_osize;	/* size of object (bytes) */
79 	int	  st_bsize;	/* vs. size of swap block (DEV_BSIZE units) */
80 #ifdef DEBUG
81 	u_long	  st_inuse;	/* number in this range in use */
82 	u_long	  st_usecnt;	/* total used of this size */
83 #endif
84 } swtab[NSWSIZES+1];
85 
86 #ifdef DEBUG
87 int		swap_pager_pendingio;	/* max pending async "clean" ops */
88 int		swap_pager_poip;	/* pageouts in progress */
89 int		swap_pager_piip;	/* pageins in progress */
90 #endif
91 
92 queue_head_t	swap_pager_inuse;	/* list of pending page cleans */
93 queue_head_t	swap_pager_free;	/* list of free pager clean structs */
94 queue_head_t	swap_pager_list;	/* list of "named" anon regions */
95 
96 void
97 swap_pager_init()
98 {
99 	register swp_clean_t spc;
100 	register int i, bsize;
101 	extern int dmmin, dmmax;
102 	int maxbsize;
103 
104 #ifdef DEBUG
105 	if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
106 		printf("swpg_init()\n");
107 #endif
108 	dfltpagerops = &swappagerops;
109 	queue_init(&swap_pager_list);
110 
111 	/*
112 	 * Initialize clean lists
113 	 */
114 	queue_init(&swap_pager_inuse);
115 	queue_init(&swap_pager_free);
116 	for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) {
117 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
118 		spc->spc_flags = SPC_FREE;
119 	}
120 
121 	/*
122 	 * Calculate the swap allocation constants.
123 	 */
124         if (dmmin == 0) {
125                 dmmin = DMMIN;
126 		if (dmmin < CLBYTES/DEV_BSIZE)
127 			dmmin = CLBYTES/DEV_BSIZE;
128 	}
129         if (dmmax == 0)
130                 dmmax = DMMAX;
131 
132 	/*
133 	 * Fill in our table of object size vs. allocation size
134 	 */
135 	bsize = btodb(PAGE_SIZE);
136 	if (bsize < dmmin)
137 		bsize = dmmin;
138 	maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
139 	if (maxbsize > dmmax)
140 		maxbsize = dmmax;
141 	for (i = 0; i < NSWSIZES; i++) {
142 		swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
143 		swtab[i].st_bsize = bsize;
144 #ifdef DEBUG
145 		if (swpagerdebug & SDB_INIT)
146 			printf("swpg_init: ix %d, size %x, bsize %x\n",
147 			       i, swtab[i].st_osize, swtab[i].st_bsize);
148 #endif
149 		if (bsize >= maxbsize)
150 			break;
151 		bsize *= 2;
152 	}
153 	swtab[i].st_osize = 0;
154 	swtab[i].st_bsize = bsize;
155 }
156 
157 /*
158  * Allocate a pager structure and associated resources.
159  * Note that if we are called from the pageout daemon (handle == NULL)
160  * we should not wait for memory as it could resulting in deadlock.
161  */
162 vm_pager_t
163 swap_pager_alloc(handle, size, prot)
164 	caddr_t handle;
165 	register vm_size_t size;
166 	vm_prot_t prot;
167 {
168 	register vm_pager_t pager;
169 	register sw_pager_t swp;
170 	struct swtab *swt;
171 	int waitok;
172 
173 #ifdef DEBUG
174 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
175 		printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
176 #endif
177 	/*
178 	 * If this is a "named" anonymous region, look it up and
179 	 * return the appropriate pager if it exists.
180 	 */
181 	if (handle) {
182 		pager = vm_pager_lookup(&swap_pager_list, handle);
183 		if (pager != NULL) {
184 			/*
185 			 * Use vm_object_lookup to gain a reference
186 			 * to the object and also to remove from the
187 			 * object cache.
188 			 */
189 			if (vm_object_lookup(pager) == NULL)
190 				panic("swap_pager_alloc: bad object");
191 			return(pager);
192 		}
193 	}
194 	/*
195 	 * Pager doesn't exist, allocate swap management resources
196 	 * and initialize.
197 	 */
198 	waitok = handle ? M_WAITOK : M_NOWAIT;
199 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
200 	if (pager == NULL)
201 		return(NULL);
202 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
203 	if (swp == NULL) {
204 #ifdef DEBUG
205 		if (swpagerdebug & SDB_FAIL)
206 			printf("swpg_alloc: swpager malloc failed\n");
207 #endif
208 		free((caddr_t)pager, M_VMPAGER);
209 		return(NULL);
210 	}
211 	size = round_page(size);
212 	for (swt = swtab; swt->st_osize; swt++)
213 		if (size <= swt->st_osize)
214 			break;
215 #ifdef DEBUG
216 	swt->st_inuse++;
217 	swt->st_usecnt++;
218 #endif
219 	swp->sw_osize = size;
220 	swp->sw_bsize = swt->st_bsize;
221 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
222 	swp->sw_blocks = (sw_blk_t)
223 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
224 		       M_VMPGDATA, M_NOWAIT);
225 	if (swp->sw_blocks == NULL) {
226 		free((caddr_t)swp, M_VMPGDATA);
227 		free((caddr_t)pager, M_VMPAGER);
228 #ifdef DEBUG
229 		if (swpagerdebug & SDB_FAIL)
230 			printf("swpg_alloc: sw_blocks malloc failed\n");
231 		swt->st_inuse--;
232 		swt->st_usecnt--;
233 #endif
234 		return(FALSE);
235 	}
236 	bzero((caddr_t)swp->sw_blocks,
237 	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
238 	swp->sw_poip = 0;
239 	if (handle) {
240 		vm_object_t object;
241 
242 		swp->sw_flags = SW_NAMED;
243 		queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list);
244 		/*
245 		 * Consistant with other pagers: return with object
246 		 * referenced.  Can't do this with handle == NULL
247 		 * since it might be the pageout daemon calling.
248 		 */
249 		object = vm_object_allocate(size);
250 		vm_object_enter(object, pager);
251 		vm_object_setpager(object, pager, 0, FALSE);
252 	} else {
253 		swp->sw_flags = 0;
254 		queue_init(&pager->pg_list);
255 	}
256 	pager->pg_handle = handle;
257 	pager->pg_ops = &swappagerops;
258 	pager->pg_type = PG_SWAP;
259 	pager->pg_data = (caddr_t)swp;
260 
261 #ifdef DEBUG
262 	if (swpagerdebug & SDB_ALLOC)
263 		printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
264 		       swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
265 #endif
266 	return(pager);
267 }
268 
269 void
270 swap_pager_dealloc(pager)
271 	vm_pager_t pager;
272 {
273 	register int i;
274 	register sw_blk_t bp;
275 	register sw_pager_t swp;
276 	struct swtab *swt;
277 	int s;
278 
279 #ifdef DEBUG
280 	/* save panic time state */
281 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
282 		return;
283 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
284 		printf("swpg_dealloc(%x)\n", pager);
285 #endif
286 	/*
287 	 * Remove from list right away so lookups will fail if we
288 	 * block for pageout completion.
289 	 */
290 	swp = (sw_pager_t) pager->pg_data;
291 	if (swp->sw_flags & SW_NAMED) {
292 		queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list);
293 		swp->sw_flags &= ~SW_NAMED;
294 	}
295 #ifdef DEBUG
296 	for (swt = swtab; swt->st_osize; swt++)
297 		if (swp->sw_osize <= swt->st_osize)
298 			break;
299 	swt->st_inuse--;
300 #endif
301 
302 	/*
303 	 * Wait for all pageouts to finish and remove
304 	 * all entries from cleaning list.
305 	 */
306 	s = splbio();
307 	while (swp->sw_poip) {
308 		swp->sw_flags |= SW_WANTED;
309 		assert_wait((int)swp);
310 		thread_block();
311 	}
312 	splx(s);
313 	(void) swap_pager_clean(NULL, B_WRITE);
314 
315 	/*
316 	 * Free left over swap blocks
317 	 */
318 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
319 		if (bp->swb_block) {
320 #ifdef DEBUG
321 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
322 				printf("swpg_dealloc: blk %x\n",
323 				       bp->swb_block);
324 #endif
325 			rmfree(swapmap, swp->sw_bsize, bp->swb_block);
326 		}
327 	/*
328 	 * Free swap management resources
329 	 */
330 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
331 	free((caddr_t)swp, M_VMPGDATA);
332 	free((caddr_t)pager, M_VMPAGER);
333 }
334 
335 swap_pager_getpage(pager, m, sync)
336 	vm_pager_t pager;
337 	vm_page_t m;
338 	boolean_t sync;
339 {
340 #ifdef DEBUG
341 	if (swpagerdebug & SDB_FOLLOW)
342 		printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync);
343 #endif
344 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ));
345 }
346 
347 swap_pager_putpage(pager, m, sync)
348 	vm_pager_t pager;
349 	vm_page_t m;
350 	boolean_t sync;
351 {
352 	int flags;
353 
354 #ifdef DEBUG
355 	if (swpagerdebug & SDB_FOLLOW)
356 		printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync);
357 #endif
358 	if (pager == NULL) {
359 		(void) swap_pager_clean(NULL, B_WRITE);
360 		return;
361 	}
362 	flags = B_WRITE;
363 	if (!sync)
364 		flags |= B_ASYNC;
365 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags));
366 }
367 
368 boolean_t
369 swap_pager_haspage(pager, offset)
370 	vm_pager_t pager;
371 	vm_offset_t offset;
372 {
373 	register sw_pager_t swp;
374 	register sw_blk_t swb;
375 	int ix;
376 
377 #ifdef DEBUG
378 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
379 		printf("swpg_haspage(%x, %x) ", pager, offset);
380 #endif
381 	swp = (sw_pager_t) pager->pg_data;
382 	ix = offset / dbtob(swp->sw_bsize);
383 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
384 #ifdef DEBUG
385 		if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
386 			printf("swpg_haspage: %x bad offset %x, ix %x\n",
387 			       swp->sw_blocks, offset, ix);
388 #endif
389 		return(FALSE);
390 	}
391 	swb = &swp->sw_blocks[ix];
392 	if (swb->swb_block)
393 		ix = atop(offset % dbtob(swp->sw_bsize));
394 #ifdef DEBUG
395 	if (swpagerdebug & SDB_ALLOCBLK)
396 		printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
397 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
398 		printf("-> %c\n",
399 		       "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
400 #endif
401 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
402 		return(TRUE);
403 	return(FALSE);
404 }
405 
406 /*
407  * Scaled down version of swap().
408  * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
409  * BOGUS:  lower level IO routines expect a KVA so we have to map our
410  * provided physical page into the KVA to keep them happy.
411  */
412 swap_pager_io(swp, m, flags)
413 	register sw_pager_t swp;
414 	vm_page_t m;
415 	int flags;
416 {
417 	register struct buf *bp;
418 	register sw_blk_t swb;
419 	register int s;
420 	int ix;
421 	boolean_t rv;
422 	vm_offset_t kva, off;
423 	swp_clean_t spc;
424 
425 #ifdef DEBUG
426 	/* save panic time state */
427 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
428 		return;
429 	if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
430 		printf("swpg_io(%x, %x, %x)\n", swp, m, flags);
431 #endif
432 
433 	/*
434 	 * For reads (pageins) and synchronous writes, we clean up
435 	 * all completed async pageouts.
436 	 */
437 	if ((flags & B_ASYNC) == 0) {
438 		s = splbio();
439 #ifdef DEBUG
440 		/*
441 		 * Check to see if this page is currently being cleaned.
442 		 * If it is, we just wait til the operation is done before
443 		 * continuing.
444 		 */
445 		while (swap_pager_clean(m, flags&B_READ)) {
446 			if (swpagerdebug & SDB_ANOM)
447 				printf("swap_pager_io: page %x cleaning\n", m);
448 
449 			swp->sw_flags |= SW_WANTED;
450 			assert_wait((int)swp);
451 			thread_block();
452 		}
453 #else
454 		(void) swap_pager_clean(m, flags&B_READ);
455 #endif
456 		splx(s);
457 	}
458 	/*
459 	 * For async writes (pageouts), we cleanup completed pageouts so
460 	 * that all available resources are freed.  Also tells us if this
461 	 * page is already being cleaned.  If it is, or no resources
462 	 * are available, we try again later.
463 	 */
464 	else if (swap_pager_clean(m, B_WRITE) ||
465 		 queue_empty(&swap_pager_free)) {
466 #ifdef DEBUG
467 		if ((swpagerdebug & SDB_ANOM) &&
468 		    !queue_empty(&swap_pager_free))
469 			printf("swap_pager_io: page %x already cleaning\n", m);
470 #endif
471 		return(VM_PAGER_FAIL);
472 	}
473 
474 	/*
475 	 * Determine swap block and allocate as necessary.
476 	 */
477 	off = m->offset + m->object->paging_offset;
478 	ix = off / dbtob(swp->sw_bsize);
479 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
480 #ifdef DEBUG
481 		if (swpagerdebug & SDB_FAIL)
482 			printf("swpg_io: bad offset %x+%x(%d) in %x\n",
483 			       m->offset, m->object->paging_offset,
484 			       ix, swp->sw_blocks);
485 #endif
486 		return(VM_PAGER_FAIL);
487 	}
488 	swb = &swp->sw_blocks[ix];
489 	off = off % dbtob(swp->sw_bsize);
490 	if (flags & B_READ) {
491 		if (swb->swb_block == 0 ||
492 		    (swb->swb_mask & (1 << atop(off))) == 0) {
493 #ifdef DEBUG
494 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL))
495 				printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n",
496 				       swp->sw_blocks,
497 				       swb->swb_block, atop(off),
498 				       swb->swb_mask,
499 				       m->offset, m->object->paging_offset);
500 #endif
501 			/* XXX: should we zero page here?? */
502 			return(VM_PAGER_FAIL);
503 		}
504 	} else if (swb->swb_block == 0) {
505 		swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
506 		if (swb->swb_block == 0) {
507 #ifdef DEBUG
508 			if (swpagerdebug & SDB_FAIL)
509 				printf("swpg_io: rmalloc of %x failed\n",
510 				       swp->sw_bsize);
511 #endif
512 			return(VM_PAGER_FAIL);
513 		}
514 #ifdef DEBUG
515 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
516 			printf("swpg_io: %x alloc blk %x at ix %x\n",
517 			       swp->sw_blocks, swb->swb_block, ix);
518 #endif
519 	}
520 
521 	/*
522 	 * Allocate a kernel virtual address and initialize so that PTE
523 	 * is available for lower level IO drivers.
524 	 */
525 	kva = vm_pager_map_page(m);
526 
527 	/*
528 	 * Get a swap buffer header and perform the IO
529 	 */
530 	s = splbio();
531 	while (bswlist.av_forw == NULL) {
532 #ifdef DEBUG
533 		if (swpagerdebug & SDB_ANOM)
534 			printf("swap_pager_io: wait on swbuf for %x (%d)\n",
535 			       m, flags);
536 #endif
537 		bswlist.b_flags |= B_WANTED;
538 		sleep((caddr_t)&bswlist, PSWP+1);
539 	}
540 	bp = bswlist.av_forw;
541 	bswlist.av_forw = bp->av_forw;
542 	splx(s);
543 	bp->b_flags = B_BUSY | (flags & B_READ);
544 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
545 	bp->b_un.b_addr = (caddr_t)kva;
546 	bp->b_blkno = swb->swb_block + btodb(off);
547 	VHOLD(swapdev_vp);
548 	bp->b_vp = swapdev_vp;
549 	if (swapdev_vp->v_type == VBLK)
550 		bp->b_dev = swapdev_vp->v_rdev;
551 	bp->b_bcount = PAGE_SIZE;
552 	if ((bp->b_flags & B_READ) == 0) {
553 		bp->b_dirtyoff = 0;
554 		bp->b_dirtyend = PAGE_SIZE;
555 		swapdev_vp->v_numoutput++;
556 	}
557 
558 	/*
559 	 * If this is an async write we set up additional buffer fields
560 	 * and place a "cleaning" entry on the inuse queue.
561 	 */
562 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
563 #ifdef DEBUG
564 		if (queue_empty(&swap_pager_free))
565 			panic("swpg_io: lost spc");
566 #endif
567 		queue_remove_first(&swap_pager_free,
568 				   spc, swp_clean_t, spc_list);
569 #ifdef DEBUG
570 		if (spc->spc_flags != SPC_FREE)
571 			panic("swpg_io: bad free spc");
572 #endif
573 		spc->spc_flags = SPC_BUSY;
574 		spc->spc_bp = bp;
575 		spc->spc_swp = swp;
576 		spc->spc_kva = kva;
577 		spc->spc_m = m;
578 		bp->b_flags |= B_CALL;
579 		bp->b_iodone = swap_pager_iodone;
580 		s = splbio();
581 		swp->sw_poip++;
582 		queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list);
583 
584 #ifdef DEBUG
585 		swap_pager_poip++;
586 		if (swpagerdebug & SDB_WRITE)
587 			printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
588 			       bp, swp, spc, swp->sw_poip);
589 		if ((swpagerdebug & SDB_ALLOCBLK) &&
590 		    (swb->swb_mask & (1 << atop(off))) == 0)
591 			printf("swpg_io: %x write blk %x+%x\n",
592 			       swp->sw_blocks, swb->swb_block, atop(off));
593 #endif
594 		swb->swb_mask |= (1 << atop(off));
595 		splx(s);
596 	}
597 #ifdef DEBUG
598 	if (swpagerdebug & SDB_IO)
599 		printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
600 		       bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
601 #endif
602 	VOP_STRATEGY(bp);
603 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
604 #ifdef DEBUG
605 		if (swpagerdebug & SDB_IO)
606 			printf("swpg_io:  IO started: bp %x\n", bp);
607 #endif
608 		return(VM_PAGER_PEND);
609 	}
610 	s = splbio();
611 #ifdef DEBUG
612 	if (flags & B_READ)
613 		swap_pager_piip++;
614 	else
615 		swap_pager_poip++;
616 #endif
617 	while ((bp->b_flags & B_DONE) == 0) {
618 		assert_wait((int)bp);
619 		thread_block();
620 	}
621 #ifdef DEBUG
622 	if (flags & B_READ)
623 		--swap_pager_piip;
624 	else
625 		--swap_pager_poip;
626 #endif
627 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
628 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
629 	bp->av_forw = bswlist.av_forw;
630 	bswlist.av_forw = bp;
631 	if (bp->b_vp)
632 		brelvp(bp);
633 	if (bswlist.b_flags & B_WANTED) {
634 		bswlist.b_flags &= ~B_WANTED;
635 		thread_wakeup((int)&bswlist);
636 	}
637 	if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
638 		m->clean = TRUE;
639 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
640 	}
641 	splx(s);
642 #ifdef DEBUG
643 	if (swpagerdebug & SDB_IO)
644 		printf("swpg_io:  IO done: bp %x, rv %d\n", bp, rv);
645 	if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_FAIL)
646 		printf("swpg_io: IO error\n");
647 #endif
648 	vm_pager_unmap_page(kva);
649 	return(rv);
650 }
651 
652 boolean_t
653 swap_pager_clean(m, rw)
654 	vm_page_t m;
655 	int rw;
656 {
657 	register swp_clean_t spc, tspc;
658 	register int s;
659 
660 #ifdef DEBUG
661 	/* save panic time state */
662 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
663 		return;
664 	if (swpagerdebug & SDB_FOLLOW)
665 		printf("swpg_clean(%x, %d)\n", m, rw);
666 #endif
667 	tspc = NULL;
668 	for (;;) {
669 		/*
670 		 * Look up and removal from inuse list must be done
671 		 * at splbio() to avoid conflicts with swap_pager_iodone.
672 		 */
673 		s = splbio();
674 		spc = (swp_clean_t) queue_first(&swap_pager_inuse);
675 		while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
676 			if ((spc->spc_flags & SPC_DONE) &&
677 			    swap_pager_finish(spc)) {
678 				queue_remove(&swap_pager_inuse, spc,
679 					     swp_clean_t, spc_list);
680 				break;
681 			}
682 			if (m && m == spc->spc_m) {
683 #ifdef DEBUG
684 				if (swpagerdebug & SDB_ANOM)
685 					printf("swap_pager_clean: page %x on list, flags %x\n",
686 					       m, spc->spc_flags);
687 #endif
688 				tspc = spc;
689 			}
690 			spc = (swp_clean_t) queue_next(&spc->spc_list);
691 		}
692 
693 		/*
694 		 * No operations done, thats all we can do for now.
695 		 */
696 		if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
697 			break;
698 		splx(s);
699 
700 		/*
701 		 * The desired page was found to be busy earlier in
702 		 * the scan but has since completed.
703 		 */
704 		if (tspc && tspc == spc) {
705 #ifdef DEBUG
706 			if (swpagerdebug & SDB_ANOM)
707 				printf("swap_pager_clean: page %x done while looking\n",
708 				       m);
709 #endif
710 			tspc = NULL;
711 		}
712 		spc->spc_flags = SPC_FREE;
713 		vm_pager_unmap_page(spc->spc_kva);
714 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
715 #ifdef DEBUG
716 		if (swpagerdebug & SDB_WRITE)
717 			printf("swpg_clean: free spc %x\n", spc);
718 #endif
719 	}
720 #ifdef DEBUG
721 	/*
722 	 * If we found that the desired page is already being cleaned
723 	 * mark it so that swap_pager_iodone() will not set the clean
724 	 * flag before the pageout daemon has another chance to clean it.
725 	 */
726 	if (tspc && rw == B_WRITE) {
727 		if (swpagerdebug & SDB_ANOM)
728 			printf("swap_pager_clean: page %x on clean list\n",
729 			       tspc);
730 		tspc->spc_flags |= SPC_DIRTY;
731 	}
732 #endif
733 	splx(s);
734 
735 #ifdef DEBUG
736 	if (swpagerdebug & SDB_WRITE)
737 		printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE);
738 	if ((swpagerdebug & SDB_ANOM) && tspc)
739 		printf("swpg_clean: %s of cleaning page %x\n",
740 		       rw == B_READ ? "get" : "put", m);
741 #endif
742 	return(tspc ? TRUE : FALSE);
743 }
744 
745 swap_pager_finish(spc)
746 	register swp_clean_t spc;
747 {
748 	vm_object_t object = spc->spc_m->object;
749 
750 	/*
751 	 * Mark the paging operation as done.
752 	 * (XXX) If we cannot get the lock, leave it til later.
753 	 * (XXX) Also we are assuming that an async write is a
754 	 *       pageout operation that has incremented the counter.
755 	 */
756 	if (!vm_object_lock_try(object))
757 		return(0);
758 
759 	if (--object->paging_in_progress == 0)
760 		thread_wakeup((int) object);
761 
762 #ifdef DEBUG
763 	/*
764 	 * XXX: this isn't even close to the right thing to do,
765 	 * introduces a variety of race conditions.
766 	 *
767 	 * If dirty, vm_pageout() has attempted to clean the page
768 	 * again.  In this case we do not do anything as we will
769 	 * see the page again shortly.
770 	 */
771 	if (spc->spc_flags & SPC_DIRTY) {
772 		if (swpagerdebug & SDB_ANOM)
773 			printf("swap_pager_finish: page %x dirty again\n",
774 			       spc->spc_m);
775 		spc->spc_m->busy = FALSE;
776 		PAGE_WAKEUP(spc->spc_m);
777 		vm_object_unlock(object);
778 		return(1);
779 	}
780 #endif
781 	/*
782 	 * If no error mark as clean and inform the pmap system.
783 	 * If error, mark as dirty so we will try again.
784 	 * (XXX could get stuck doing this, should give up after awhile)
785 	 */
786 	if (spc->spc_flags & SPC_ERROR) {
787 		printf("swap_pager_finish: clean of page %x failed\n",
788 		       VM_PAGE_TO_PHYS(spc->spc_m));
789 		spc->spc_m->laundry = TRUE;
790 	} else {
791 		spc->spc_m->clean = TRUE;
792 		pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
793 	}
794 	spc->spc_m->busy = FALSE;
795 	PAGE_WAKEUP(spc->spc_m);
796 
797 	vm_object_unlock(object);
798 	return(1);
799 }
800 
801 swap_pager_iodone(bp)
802 	register struct buf *bp;
803 {
804 	register swp_clean_t spc;
805 	daddr_t blk;
806 	int s;
807 
808 #ifdef DEBUG
809 	/* save panic time state */
810 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
811 		return;
812 	if (swpagerdebug & SDB_FOLLOW)
813 		printf("swpg_iodone(%x)\n", bp);
814 #endif
815 	s = splbio();
816 	spc = (swp_clean_t) queue_first(&swap_pager_inuse);
817 	while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
818 		if (spc->spc_bp == bp)
819 			break;
820 		spc = (swp_clean_t) queue_next(&spc->spc_list);
821 	}
822 #ifdef DEBUG
823 	if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
824 		panic("swap_pager_iodone: bp not found");
825 #endif
826 
827 	spc->spc_flags &= ~SPC_BUSY;
828 	spc->spc_flags |= SPC_DONE;
829 	if (bp->b_flags & B_ERROR)
830 		spc->spc_flags |= SPC_ERROR;
831 	spc->spc_bp = NULL;
832 	blk = bp->b_blkno;
833 
834 #ifdef DEBUG
835 	--swap_pager_poip;
836 	if (swpagerdebug & SDB_WRITE)
837 		printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
838 		       bp, spc->spc_swp, spc->spc_swp->sw_flags,
839 		       spc, spc->spc_swp->sw_poip);
840 #endif
841 
842 	spc->spc_swp->sw_poip--;
843 	if (spc->spc_swp->sw_flags & SW_WANTED) {
844 		spc->spc_swp->sw_flags &= ~SW_WANTED;
845 		thread_wakeup((int)spc->spc_swp);
846 	}
847 
848 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
849 	bp->av_forw = bswlist.av_forw;
850 	bswlist.av_forw = bp;
851 	if (bp->b_vp)
852 		brelvp(bp);
853 	if (bswlist.b_flags & B_WANTED) {
854 		bswlist.b_flags &= ~B_WANTED;
855 		thread_wakeup((int)&bswlist);
856 	}
857 	thread_wakeup((int) &vm_pages_needed);
858 	splx(s);
859 }
860 #endif
861