xref: /dflybsd-src/sys/vm/vm_swapcache.c (revision c54b5d8d8e1af1fe69da36b12d2ba79580a9ce55)
1 /*
2  * (MPSAFE)
3  *
4  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
5  *
6  * This code is derived from software contributed to The DragonFly Project
7  * by Matthew Dillon <dillon@backplane.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  * 3. Neither the name of The DragonFly Project nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific, prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 /*
38  * Implement the swapcache daemon.  When enabled swap is assumed to be
39  * configured on a fast storage device such as a SSD.  Swap is assigned
40  * to clean vnode-backed pages in the inactive queue, clustered by object
41  * if possible, and written out.  The swap assignment sticks around even
42  * after the underlying pages have been recycled.
43  *
44  * The daemon manages write bandwidth based on sysctl settings to control
45  * wear on the SSD.
46  *
47  * The vnode strategy code will check for the swap assignments and divert
48  * reads to the swap device when the data is present in the swapcache.
49  *
50  * This operates on both regular files and the block device vnodes used by
51  * filesystems to manage meta-data.
52  */
53 
54 #include "opt_vm.h"
55 #include <sys/param.h>
56 #include <sys/systm.h>
57 #include <sys/kernel.h>
58 #include <sys/proc.h>
59 #include <sys/kthread.h>
60 #include <sys/resourcevar.h>
61 #include <sys/signalvar.h>
62 #include <sys/vnode.h>
63 #include <sys/vmmeter.h>
64 #include <sys/sysctl.h>
65 #include <sys/eventhandler.h>
66 
67 #include <vm/vm.h>
68 #include <vm/vm_param.h>
69 #include <sys/lock.h>
70 #include <vm/vm_object.h>
71 #include <vm/vm_page.h>
72 #include <vm/vm_map.h>
73 #include <vm/vm_pageout.h>
74 #include <vm/vm_pager.h>
75 #include <vm/swap_pager.h>
76 #include <vm/vm_extern.h>
77 
78 #include <sys/thread2.h>
79 #include <vm/vm_page2.h>
80 
81 #define INACTIVE_LIST	(&vm_page_queues[PQ_INACTIVE].pl)
82 
83 /* the kernel process "vm_pageout"*/
84 static int vm_swapcached_flush (vm_page_t m, int isblkdev);
85 static int vm_swapcache_test(vm_page_t m);
86 static void vm_swapcache_writing(vm_page_t marker);
87 static void vm_swapcache_cleaning(vm_object_t marker);
88 struct thread *swapcached_thread;
89 
90 SYSCTL_NODE(_vm, OID_AUTO, swapcache, CTLFLAG_RW, NULL, NULL);
91 
92 int vm_swapcache_read_enable;
93 int vm_swapcache_inactive_heuristic;
94 static int vm_swapcache_sleep;
95 static int vm_swapcache_maxlaunder = 256;
96 static int vm_swapcache_data_enable = 0;
97 static int vm_swapcache_meta_enable = 0;
98 static int vm_swapcache_maxswappct = 75;
99 static int vm_swapcache_hysteresis;
100 int vm_swapcache_use_chflags = 1;	/* require chflags cache */
101 static int64_t vm_swapcache_minburst = 10000000LL;	/* 10MB */
102 static int64_t vm_swapcache_curburst = 4000000000LL;	/* 4G after boot */
103 static int64_t vm_swapcache_maxburst = 2000000000LL;	/* 2G nominal max */
104 static int64_t vm_swapcache_accrate = 100000LL;		/* 100K/s */
105 static int64_t vm_swapcache_write_count;
106 static int64_t vm_swapcache_maxfilesize;
107 
108 SYSCTL_INT(_vm_swapcache, OID_AUTO, maxlaunder,
109 	CTLFLAG_RW, &vm_swapcache_maxlaunder, 0, "");
110 
111 SYSCTL_INT(_vm_swapcache, OID_AUTO, data_enable,
112 	CTLFLAG_RW, &vm_swapcache_data_enable, 0, "");
113 SYSCTL_INT(_vm_swapcache, OID_AUTO, meta_enable,
114 	CTLFLAG_RW, &vm_swapcache_meta_enable, 0, "");
115 SYSCTL_INT(_vm_swapcache, OID_AUTO, read_enable,
116 	CTLFLAG_RW, &vm_swapcache_read_enable, 0, "");
117 SYSCTL_INT(_vm_swapcache, OID_AUTO, maxswappct,
118 	CTLFLAG_RW, &vm_swapcache_maxswappct, 0, "");
119 SYSCTL_INT(_vm_swapcache, OID_AUTO, hysteresis,
120 	CTLFLAG_RW, &vm_swapcache_hysteresis, 0, "");
121 SYSCTL_INT(_vm_swapcache, OID_AUTO, use_chflags,
122 	CTLFLAG_RW, &vm_swapcache_use_chflags, 0, "");
123 
124 SYSCTL_QUAD(_vm_swapcache, OID_AUTO, minburst,
125 	CTLFLAG_RW, &vm_swapcache_minburst, 0, "");
126 SYSCTL_QUAD(_vm_swapcache, OID_AUTO, curburst,
127 	CTLFLAG_RW, &vm_swapcache_curburst, 0, "");
128 SYSCTL_QUAD(_vm_swapcache, OID_AUTO, maxburst,
129 	CTLFLAG_RW, &vm_swapcache_maxburst, 0, "");
130 SYSCTL_QUAD(_vm_swapcache, OID_AUTO, maxfilesize,
131 	CTLFLAG_RW, &vm_swapcache_maxfilesize, 0, "");
132 SYSCTL_QUAD(_vm_swapcache, OID_AUTO, accrate,
133 	CTLFLAG_RW, &vm_swapcache_accrate, 0, "");
134 SYSCTL_QUAD(_vm_swapcache, OID_AUTO, write_count,
135 	CTLFLAG_RW, &vm_swapcache_write_count, 0, "");
136 
137 #define SWAPMAX(adj)	\
138 	((int64_t)vm_swap_max * (vm_swapcache_maxswappct + (adj)) / 100)
139 
140 /*
141  * When shutting down the machine we want to stop swapcache operation
142  * immediately so swap is not accessed after devices have been shuttered.
143  */
144 static void
145 shutdown_swapcache(void *arg __unused)
146 {
147 	vm_swapcache_read_enable = 0;
148 	vm_swapcache_data_enable = 0;
149 	vm_swapcache_meta_enable = 0;
150 	wakeup(&vm_swapcache_sleep);	/* shortcut 5-second wait */
151 }
152 
153 /*
154  * vm_swapcached is the high level pageout daemon.
155  *
156  * No requirements.
157  */
158 static void
159 vm_swapcached_thread(void)
160 {
161 	enum { SWAPC_WRITING, SWAPC_CLEANING } state = SWAPC_WRITING;
162 	enum { SWAPB_BURSTING, SWAPB_RECOVERING } burst = SWAPB_BURSTING;
163 	struct vm_page page_marker;
164 	struct vm_object object_marker;
165 
166 	/*
167 	 * Thread setup
168 	 */
169 	curthread->td_flags |= TDF_SYSTHREAD;
170 	EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc,
171 			      swapcached_thread, SHUTDOWN_PRI_FIRST);
172 	EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_swapcache,
173 			      NULL, SHUTDOWN_PRI_SECOND);
174 	lwkt_gettoken(&vm_token);
175 
176 	/*
177 	 * Initialize our marker for the inactive scan (SWAPC_WRITING)
178 	 */
179 	bzero(&page_marker, sizeof(page_marker));
180 	page_marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
181 	page_marker.queue = PQ_INACTIVE;
182 	page_marker.wire_count = 1;
183 	TAILQ_INSERT_HEAD(INACTIVE_LIST, &page_marker, pageq);
184 	vm_swapcache_hysteresis = vmstats.v_inactive_target / 2;
185 	vm_swapcache_inactive_heuristic = -vm_swapcache_hysteresis;
186 
187 	/*
188 	 * Initialize our marker for the vm_object scan (SWAPC_CLEANING)
189 	 */
190 	bzero(&object_marker, sizeof(object_marker));
191 	object_marker.type = OBJT_MARKER;
192 	lwkt_gettoken(&vmobj_token);
193 	TAILQ_INSERT_HEAD(&vm_object_list, &object_marker, object_list);
194 	lwkt_reltoken(&vmobj_token);
195 
196 	for (;;) {
197 		/*
198 		 * Handle shutdown
199 		 */
200 		kproc_suspend_loop();
201 
202 		/*
203 		 * Check every 5 seconds when not enabled or if no swap
204 		 * is present.
205 		 */
206 		if ((vm_swapcache_data_enable == 0 &&
207 		     vm_swapcache_meta_enable == 0) ||
208 		    vm_swap_max == 0) {
209 			tsleep(&vm_swapcache_sleep, 0, "csleep", hz * 5);
210 			continue;
211 		}
212 
213 		/*
214 		 * Polling rate when enabled is approximately 10 hz.
215 		 */
216 		tsleep(&vm_swapcache_sleep, 0, "csleep", hz / 10);
217 
218 		/*
219 		 * State hysteresis.  Generate write activity up to 75% of
220 		 * swap, then clean out swap assignments down to 70%, then
221 		 * repeat.
222 		 */
223 		if (state == SWAPC_WRITING) {
224 			if (vm_swap_cache_use > SWAPMAX(0))
225 				state = SWAPC_CLEANING;
226 		} else {
227 			if (vm_swap_cache_use < SWAPMAX(-5))
228 				state = SWAPC_WRITING;
229 		}
230 
231 		/*
232 		 * We are allowed to continue accumulating burst value
233 		 * in either state.  Allow the user to set curburst > maxburst
234 		 * for the initial load-in.
235 		 */
236 		if (vm_swapcache_curburst < vm_swapcache_maxburst) {
237 			vm_swapcache_curburst += vm_swapcache_accrate / 10;
238 			if (vm_swapcache_curburst > vm_swapcache_maxburst)
239 				vm_swapcache_curburst = vm_swapcache_maxburst;
240 		}
241 
242 		/*
243 		 * We don't want to nickle-and-dime the scan as that will
244 		 * create unnecessary fragmentation.  The minimum burst
245 		 * is one-seconds worth of accumulation.
246 		 */
247 		if (state == SWAPC_WRITING) {
248 			if (vm_swapcache_curburst >= vm_swapcache_accrate) {
249 				if (burst == SWAPB_BURSTING) {
250 					vm_swapcache_writing(&page_marker);
251 					if (vm_swapcache_curburst <= 0)
252 						burst = SWAPB_RECOVERING;
253 				} else if (vm_swapcache_curburst >
254 					   vm_swapcache_minburst) {
255 					vm_swapcache_writing(&page_marker);
256 					burst = SWAPB_BURSTING;
257 				}
258 			}
259 		} else {
260 			vm_swapcache_cleaning(&object_marker);
261 		}
262 	}
263 
264 	/*
265 	 * Cleanup (NOT REACHED)
266 	 */
267 	TAILQ_REMOVE(INACTIVE_LIST, &page_marker, pageq);
268 	lwkt_reltoken(&vm_token);
269 
270 	lwkt_gettoken(&vmobj_token);
271 	TAILQ_REMOVE(&vm_object_list, &object_marker, object_list);
272 	lwkt_reltoken(&vmobj_token);
273 }
274 
275 static struct kproc_desc swpc_kp = {
276 	"swapcached",
277 	vm_swapcached_thread,
278 	&swapcached_thread
279 };
280 SYSINIT(swapcached, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &swpc_kp)
281 
282 /*
283  * The caller must hold vm_token.
284  */
285 static void
286 vm_swapcache_writing(vm_page_t marker)
287 {
288 	vm_object_t object;
289 	struct vnode *vp;
290 	vm_page_t m;
291 	int count;
292 	int isblkdev;
293 
294 	/*
295 	 * Deal with an overflow of the heuristic counter or if the user
296 	 * manually changes the hysteresis.
297 	 *
298 	 * Try to avoid small incremental pageouts by waiting for enough
299 	 * pages to buildup in the inactive queue to hopefully get a good
300 	 * burst in.  This heuristic is bumped by the VM system and reset
301 	 * when our scan hits the end of the queue.
302 	 */
303 	if (vm_swapcache_inactive_heuristic < -vm_swapcache_hysteresis)
304 		vm_swapcache_inactive_heuristic = -vm_swapcache_hysteresis;
305 	if (vm_swapcache_inactive_heuristic < 0)
306 		return;
307 
308 	/*
309 	 * Scan the inactive queue from our marker to locate
310 	 * suitable pages to push to the swap cache.
311 	 *
312 	 * We are looking for clean vnode-backed pages.
313 	 *
314 	 * NOTE: PG_SWAPPED pages in particular are not part of
315 	 *	 our count because once the cache stabilizes we
316 	 *	 can end up with a very high datarate of VM pages
317 	 *	 cycling from it.
318 	 */
319 	m = marker;
320 	count = vm_swapcache_maxlaunder;
321 
322 	while ((m = TAILQ_NEXT(m, pageq)) != NULL && count--) {
323 		if (m->flags & (PG_MARKER | PG_SWAPPED)) {
324 			++count;
325 			continue;
326 		}
327 		if (vm_swapcache_curburst < 0)
328 			break;
329 		if (vm_swapcache_test(m))
330 			continue;
331 		object = m->object;
332 		vp = object->handle;
333 		if (vp == NULL)
334 			continue;
335 
336 		switch(vp->v_type) {
337 		case VREG:
338 			/*
339 			 * PG_NOTMETA generically means 'don't swapcache this',
340 			 * and HAMMER will set this for regular data buffers
341 			 * (and leave it unset for meta-data buffers) as
342 			 * appropriate when double buffering is enabled.
343 			 */
344 			if (m->flags & PG_NOTMETA)
345 				continue;
346 
347 			/*
348 			 * If data_enable is 0 do not try to swapcache data.
349 			 * If use_chflags is set then only swapcache data for
350 			 * VSWAPCACHE marked vnodes, otherwise any vnode.
351 			 */
352 			if (vm_swapcache_data_enable == 0 ||
353 			    ((vp->v_flag & VSWAPCACHE) == 0 &&
354 			     vm_swapcache_use_chflags)) {
355 				continue;
356 			}
357 			if (vm_swapcache_maxfilesize &&
358 			    object->size >
359 			    (vm_swapcache_maxfilesize >> PAGE_SHIFT)) {
360 				continue;
361 			}
362 			isblkdev = 0;
363 			break;
364 		case VCHR:
365 			/*
366 			 * PG_NOTMETA generically means 'don't swapcache this',
367 			 * and HAMMER will set this for regular data buffers
368 			 * (and leave it unset for meta-data buffers) as
369 			 * appropriate when double buffering is enabled.
370 			 */
371 			if (m->flags & PG_NOTMETA)
372 				continue;
373 			if (vm_swapcache_meta_enable == 0)
374 				continue;
375 			isblkdev = 1;
376 			break;
377 		default:
378 			continue;
379 		}
380 
381 		/*
382 		 * Ok, move the marker and soft-busy the page.
383 		 */
384 		TAILQ_REMOVE(INACTIVE_LIST, marker, pageq);
385 		TAILQ_INSERT_AFTER(INACTIVE_LIST, m, marker, pageq);
386 
387 		/*
388 		 * Assign swap and initiate I/O.
389 		 *
390 		 * (adjust for the --count which also occurs in the loop)
391 		 */
392 		count -= vm_swapcached_flush(m, isblkdev) - 1;
393 
394 		/*
395 		 * Setup for next loop using marker.
396 		 */
397 		m = marker;
398 	}
399 
400 	/*
401 	 * Cleanup marker position.  If we hit the end of the
402 	 * list the marker is placed at the tail.  Newly deactivated
403 	 * pages will be placed after it.
404 	 *
405 	 * Earlier inactive pages that were dirty and become clean
406 	 * are typically moved to the end of PQ_INACTIVE by virtue
407 	 * of vfs_vmio_release() when they become unwired from the
408 	 * buffer cache.
409 	 */
410 	TAILQ_REMOVE(INACTIVE_LIST, marker, pageq);
411 	if (m) {
412 		TAILQ_INSERT_BEFORE(m, marker, pageq);
413 	} else {
414 		TAILQ_INSERT_TAIL(INACTIVE_LIST, marker, pageq);
415 		vm_swapcache_inactive_heuristic = -vm_swapcache_hysteresis;
416 	}
417 }
418 
419 /*
420  * Flush the specified page using the swap_pager.
421  *
422  * Try to collect surrounding pages, including pages which may
423  * have already been assigned swap.  Try to cluster within a
424  * contiguous aligned SMAP_META_PAGES (typ 16 x PAGE_SIZE) block
425  * to match what swap_pager_putpages() can do.
426  *
427  * We also want to try to match against the buffer cache blocksize
428  * but we don't really know what it is here.  Since the buffer cache
429  * wires and unwires pages in groups the fact that we skip wired pages
430  * should be sufficient.
431  *
432  * Returns a count of pages we might have flushed (minimum 1)
433  *
434  * The caller must hold vm_token.
435  */
436 static
437 int
438 vm_swapcached_flush(vm_page_t m, int isblkdev)
439 {
440 	vm_object_t object;
441 	vm_page_t marray[SWAP_META_PAGES];
442 	vm_pindex_t basei;
443 	int rtvals[SWAP_META_PAGES];
444 	int x;
445 	int i;
446 	int j;
447 	int count;
448 
449 	vm_page_io_start(m);
450 	vm_page_protect(m, VM_PROT_READ);
451 	object = m->object;
452 
453 	/*
454 	 * Try to cluster around (m), keeping in mind that the swap pager
455 	 * can only do SMAP_META_PAGES worth of continguous write.
456 	 */
457 	x = (int)m->pindex & SWAP_META_MASK;
458 	marray[x] = m;
459 	basei = m->pindex;
460 
461 	for (i = x - 1; i >= 0; --i) {
462 		m = vm_page_lookup(object, basei - x + i);
463 		if (m == NULL)
464 			break;
465 		if (vm_swapcache_test(m))
466 			break;
467 		if (isblkdev && (m->flags & PG_NOTMETA))
468 			break;
469 		vm_page_io_start(m);
470 		vm_page_protect(m, VM_PROT_READ);
471 		if (m->queue - m->pc == PQ_CACHE) {
472 			vm_page_unqueue_nowakeup(m);
473 			vm_page_deactivate(m);
474 		}
475 		marray[i] = m;
476 	}
477 	++i;
478 
479 	for (j = x + 1; j < SWAP_META_PAGES; ++j) {
480 		m = vm_page_lookup(object, basei - x + j);
481 		if (m == NULL)
482 			break;
483 		if (vm_swapcache_test(m))
484 			break;
485 		if (isblkdev && (m->flags & PG_NOTMETA))
486 			break;
487 		vm_page_io_start(m);
488 		vm_page_protect(m, VM_PROT_READ);
489 		if (m->queue - m->pc == PQ_CACHE) {
490 			vm_page_unqueue_nowakeup(m);
491 			vm_page_deactivate(m);
492 		}
493 		marray[j] = m;
494 	}
495 
496 	count = j - i;
497 	vm_object_pip_add(object, count);
498 	swap_pager_putpages(object, marray + i, count, FALSE, rtvals + i);
499 	vm_swapcache_write_count += count * PAGE_SIZE;
500 	vm_swapcache_curburst -= count * PAGE_SIZE;
501 
502 	while (i < j) {
503 		if (rtvals[i] != VM_PAGER_PEND) {
504 			vm_page_io_finish(marray[i]);
505 			vm_object_pip_wakeup(object);
506 		}
507 		++i;
508 	}
509 	return(count);
510 }
511 
512 /*
513  * Test whether a VM page is suitable for writing to the swapcache.
514  * Does not test m->queue, PG_MARKER, or PG_SWAPPED.
515  *
516  * Returns 0 on success, 1 on failure
517  *
518  * The caller must hold vm_token.
519  */
520 static int
521 vm_swapcache_test(vm_page_t m)
522 {
523 	vm_object_t object;
524 
525 	if (m->flags & (PG_BUSY | PG_UNMANAGED))
526 		return(1);
527 	if (m->busy || m->hold_count || m->wire_count)
528 		return(1);
529 	if (m->valid != VM_PAGE_BITS_ALL)
530 		return(1);
531 	if (m->dirty & m->valid)
532 		return(1);
533 	if ((object = m->object) == NULL)
534 		return(1);
535 	if (object->type != OBJT_VNODE ||
536 	    (object->flags & OBJ_DEAD)) {
537 		return(1);
538 	}
539 	vm_page_test_dirty(m);
540 	if (m->dirty & m->valid)
541 		return(1);
542 	return(0);
543 }
544 
545 /*
546  * Cleaning pass
547  *
548  * The caller must hold vm_token.
549  */
550 static
551 void
552 vm_swapcache_cleaning(vm_object_t marker)
553 {
554 	vm_object_t object;
555 	struct vnode *vp;
556 	int count;
557 	int n;
558 
559 	object = marker;
560 	count = vm_swapcache_maxlaunder;
561 
562 	/*
563 	 * Look for vnode objects
564 	 */
565 	lwkt_gettoken(&vm_token);
566 	lwkt_gettoken(&vmobj_token);
567 
568 	while ((object = TAILQ_NEXT(object, object_list)) != NULL) {
569 		if (--count <= 0)
570 			break;
571 
572 		vm_object_hold(object);
573 
574 		/*
575 		 * Only operate on live VNODE objects with regular/chardev types
576 		 */
577 		if ((object->type != OBJT_VNODE) ||
578 		    ((object->flags & OBJ_DEAD) || object->swblock_count == 0) ||
579 		    ((vp = object->handle) == NULL) ||
580 		    (vp->v_type != VREG && vp->v_type != VCHR)) {
581 			vm_object_drop(object);
582 			continue;
583 		}
584 
585 		/*
586 		 * Adjust iterator.
587 		 */
588 		if (marker->backing_object != object)
589 			marker->size = 0;
590 
591 		/*
592 		 * Move the marker so we can work on the VM object
593 		 */
594 		TAILQ_REMOVE(&vm_object_list, marker, object_list);
595 		TAILQ_INSERT_AFTER(&vm_object_list, object,
596 				   marker, object_list);
597 
598 		/*
599 		 * Look for swblocks starting at our iterator.
600 		 *
601 		 * The swap_pager_condfree() function attempts to free
602 		 * swap space starting at the specified index.  The index
603 		 * will be updated on return.  The function will return
604 		 * a scan factor (NOT the number of blocks freed).
605 		 *
606 		 * If it must cut its scan of the object short due to an
607 		 * excessive number of swblocks, or is able to free the
608 		 * requested number of blocks, it will return n >= count
609 		 * and we break and pick it back up on a future attempt.
610 		 */
611 		n = swap_pager_condfree(object, &marker->size, count);
612 
613 		vm_object_drop(object);
614 
615 		count -= n;
616 		if (count < 0)
617 			break;
618 
619 		/*
620 		 * Setup for loop.
621 		 */
622 		marker->size = 0;
623 		object = marker;
624 	}
625 
626 	/*
627 	 * Adjust marker so we continue the scan from where we left off.
628 	 * When we reach the end we start back at the beginning.
629 	 */
630 	TAILQ_REMOVE(&vm_object_list, marker, object_list);
631 	if (object)
632 		TAILQ_INSERT_BEFORE(object, marker, object_list);
633 	else
634 		TAILQ_INSERT_HEAD(&vm_object_list, marker, object_list);
635 	marker->backing_object = object;
636 
637 	lwkt_reltoken(&vmobj_token);
638 	lwkt_reltoken(&vm_token);
639 }
640