xref: /netbsd-src/sys/uvm/uvm_loan.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: uvm_loan.c,v 1.69 2007/12/01 10:40:28 yamt Exp $	*/
2 
3 /*
4  *
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Charles D. Cranor and
19  *      Washington University.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
35  */
36 
37 /*
38  * uvm_loan.c: page loanout handler
39  */
40 
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.69 2007/12/01 10:40:28 yamt Exp $");
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/proc.h>
48 #include <sys/malloc.h>
49 #include <sys/mman.h>
50 
51 #include <uvm/uvm.h>
52 
53 /*
54  * "loaned" pages are pages which are (read-only, copy-on-write) loaned
55  * from the VM system to other parts of the kernel.   this allows page
56  * copying to be avoided (e.g. you can loan pages from objs/anons to
57  * the mbuf system).
58  *
59  * there are 3 types of loans possible:
60  *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
61  *  A->K  anon page to wired kernel page (e.g. mbuf data area)
62  *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
63  * note that it possible to have an O page loaned to both an A and K
64  * at the same time.
65  *
66  * loans are tracked by pg->loan_count.  an O->A page will have both
67  * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
68  * of page is considered "owned" by the uvm_object (not the anon).
69  *
70  * each loan of a page to the kernel bumps the pg->wire_count.  the
71  * kernel mappings for these pages will be read-only and wired.  since
72  * the page will also be wired, it will not be a candidate for pageout,
73  * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
74  * write fault in the kernel to one of these pages will not cause
75  * copy-on-write.  instead, the page fault is considered fatal.  this
76  * is because the kernel mapping will have no way to look up the
77  * object/anon which the page is owned by.  this is a good side-effect,
78  * since a kernel write to a loaned page is an error.
79  *
80  * owners that want to free their pages and discover that they are
81  * loaned out simply "disown" them (the page becomes an orphan).  these
82  * pages should be freed when the last loan is dropped.   in some cases
83  * an anon may "adopt" an orphaned page.
84  *
85  * locking: to read pg->loan_count either the owner or the page queues
86  * must be locked.   to modify pg->loan_count, both the owner of the page
87  * and the PQs must be locked.   pg->flags is (as always) locked by
88  * the owner of the page.
89  *
90  * note that locking from the "loaned" side is tricky since the object
91  * getting the loaned page has no reference to the page's owner and thus
92  * the owner could "die" at any time.   in order to prevent the owner
93  * from dying the page queues should be locked.   this forces us to sometimes
94  * use "try" locking.
95  *
96  * loans are typically broken by the following events:
97  *  1. user-level xwrite fault to a loaned page
98  *  2. pageout of clean+inactive O->A loaned page
99  *  3. owner frees page (e.g. pager flush)
100  *
101  * note that loaning a page causes all mappings of the page to become
102  * read-only (via pmap_page_protect).   this could have an unexpected
103  * effect on normal "wired" pages if one is not careful (XXX).
104  */
105 
106 /*
107  * local prototypes
108  */
109 
110 static int	uvm_loananon(struct uvm_faultinfo *, void ***,
111 			     int, struct vm_anon *);
112 static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
113 			     int, vaddr_t);
114 static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
115 static void	uvm_unloananon(struct vm_anon **, int);
116 static void	uvm_unloanpage(struct vm_page **, int);
117 static int	uvm_loanpage(struct vm_page **, int);
118 
119 
120 /*
121  * inlines
122  */
123 
124 /*
125  * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
126  *
127  * => "ufi" is the result of a successful map lookup (meaning that
128  *	on entry the map is locked by the caller)
129  * => we may unlock and then relock the map if needed (for I/O)
130  * => we put our output result in "output"
131  * => we always return with the map unlocked
132  * => possible return values:
133  *	-1 == error, map is unlocked
134  *	 0 == map relock error (try again!), map is unlocked
135  *	>0 == number of pages we loaned, map is unlocked
136  *
137  * NOTE: We can live with this being an inline, because it is only called
138  * from one place.
139  */
140 
141 static inline int
142 uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
143 {
144 	vaddr_t curaddr = ufi->orig_rvaddr;
145 	vsize_t togo = ufi->size;
146 	struct vm_aref *aref = &ufi->entry->aref;
147 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
148 	struct vm_anon *anon;
149 	int rv, result = 0;
150 
151 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
152 
153 	/*
154 	 * lock us the rest of the way down (we unlock before return)
155 	 */
156 	if (aref->ar_amap)
157 		amap_lock(aref->ar_amap);
158 
159 	/*
160 	 * loop until done
161 	 */
162 	while (togo) {
163 
164 		/*
165 		 * find the page we want.   check the anon layer first.
166 		 */
167 
168 		if (aref->ar_amap) {
169 			anon = amap_lookup(aref, curaddr - ufi->entry->start);
170 		} else {
171 			anon = NULL;
172 		}
173 
174 		/* locked: map, amap, uobj */
175 		if (anon) {
176 			rv = uvm_loananon(ufi, output, flags, anon);
177 		} else if (uobj) {
178 			rv = uvm_loanuobj(ufi, output, flags, curaddr);
179 		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
180 			rv = uvm_loanzero(ufi, output, flags);
181 		} else {
182 			uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL);
183 			rv = -1;
184 		}
185 		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
186 		KASSERT(rv > 0 || aref->ar_amap == NULL ||
187 		    !mutex_owned(&aref->ar_amap->am_l));
188 
189 		/* total failure */
190 		if (rv < 0) {
191 			UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0);
192 			return (-1);
193 		}
194 
195 		/* relock failed, need to do another lookup */
196 		if (rv == 0) {
197 			UVMHIST_LOG(loanhist, "relock failure %d", result
198 			    ,0,0,0);
199 			return (result);
200 		}
201 
202 		/*
203 		 * got it... advance to next page
204 		 */
205 
206 		result++;
207 		togo -= PAGE_SIZE;
208 		curaddr += PAGE_SIZE;
209 	}
210 
211 	/*
212 	 * unlock what we locked, unlock the maps and return
213 	 */
214 
215 	if (aref->ar_amap)
216 		amap_unlock(aref->ar_amap);
217 	uvmfault_unlockmaps(ufi, false);
218 	UVMHIST_LOG(loanhist, "done %d", result, 0,0,0);
219 	return (result);
220 }
221 
222 /*
223  * normal functions
224  */
225 
226 /*
227  * uvm_loan: loan pages in a map out to anons or to the kernel
228  *
229  * => map should be unlocked
230  * => start and len should be multiples of PAGE_SIZE
231  * => result is either an array of anon's or vm_pages (depending on flags)
232  * => flag values: UVM_LOAN_TOANON - loan to anons
233  *                 UVM_LOAN_TOPAGE - loan to wired kernel page
234  *    one and only one of these flags must be set!
235  * => returns 0 (success), or an appropriate error number
236  */
237 
238 int
239 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
240 {
241 	struct uvm_faultinfo ufi;
242 	void **result, **output;
243 	int rv, error;
244 
245 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
246 
247 	/*
248 	 * ensure that one and only one of the flags is set
249 	 */
250 
251 	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
252 		((flags & UVM_LOAN_TOPAGE) == 0));
253 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
254 
255 	/*
256 	 * "output" is a pointer to the current place to put the loaned page.
257 	 */
258 
259 	result = v;
260 	output = &result[0];	/* start at the beginning ... */
261 
262 	/*
263 	 * while we've got pages to do
264 	 */
265 
266 	while (len > 0) {
267 
268 		/*
269 		 * fill in params for a call to uvmfault_lookup
270 		 */
271 
272 		ufi.orig_map = map;
273 		ufi.orig_rvaddr = start;
274 		ufi.orig_size = len;
275 
276 		/*
277 		 * do the lookup, the only time this will fail is if we hit on
278 		 * an unmapped region (an error)
279 		 */
280 
281 		if (!uvmfault_lookup(&ufi, false)) {
282 			error = ENOENT;
283 			goto fail;
284 		}
285 
286 		/*
287 		 * map now locked.  now do the loanout...
288 		 */
289 
290 		rv = uvm_loanentry(&ufi, &output, flags);
291 		if (rv < 0) {
292 			/* all unlocked due to error */
293 			error = EINVAL;
294 			goto fail;
295 		}
296 
297 		/*
298 		 * done!  the map is unlocked.  advance, if possible.
299 		 *
300 		 * XXXCDC: could be recoded to hold the map lock with
301 		 *	   smarter code (but it only happens on map entry
302 		 *	   boundaries, so it isn't that bad).
303 		 */
304 
305 		if (rv) {
306 			rv <<= PAGE_SHIFT;
307 			len -= rv;
308 			start += rv;
309 		}
310 	}
311 	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
312 	return 0;
313 
314 fail:
315 	/*
316 	 * failed to complete loans.  drop any loans and return failure code.
317 	 * map is already unlocked.
318 	 */
319 
320 	if (output - result) {
321 		if (flags & UVM_LOAN_TOANON) {
322 			uvm_unloananon((struct vm_anon **)result,
323 			    output - result);
324 		} else {
325 			uvm_unloanpage((struct vm_page **)result,
326 			    output - result);
327 		}
328 	}
329 	UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
330 	return (error);
331 }
332 
333 /*
334  * uvm_loananon: loan a page from an anon out
335  *
336  * => called with map, amap, uobj locked
337  * => return value:
338  *	-1 = fatal error, everything is unlocked, abort.
339  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
340  *		try again
341  *	 1 = got it, everything still locked
342  */
343 
344 int
345 uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
346     struct vm_anon *anon)
347 {
348 	struct vm_page *pg;
349 	int error;
350 
351 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
352 
353 	/*
354 	 * if we are loaning to "another" anon then it is easy, we just
355 	 * bump the reference count on the current anon and return a
356 	 * pointer to it (it becomes copy-on-write shared).
357 	 */
358 
359 	if (flags & UVM_LOAN_TOANON) {
360 		simple_lock(&anon->an_lock);
361 		pg = anon->an_page;
362 		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
363 			if (pg->wire_count > 0) {
364 				UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0);
365 				uvmfault_unlockall(ufi,
366 				    ufi->entry->aref.ar_amap,
367 				    ufi->entry->object.uvm_obj, anon);
368 				return (-1);
369 			}
370 			pmap_page_protect(pg, VM_PROT_READ);
371 		}
372 		anon->an_ref++;
373 		**output = anon;
374 		(*output)++;
375 		simple_unlock(&anon->an_lock);
376 		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
377 		return (1);
378 	}
379 
380 	/*
381 	 * we are loaning to a kernel-page.   we need to get the page
382 	 * resident so we can wire it.   uvmfault_anonget will handle
383 	 * this for us.
384 	 */
385 
386 	simple_lock(&anon->an_lock);
387 	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
388 
389 	/*
390 	 * if we were unable to get the anon, then uvmfault_anonget has
391 	 * unlocked everything and returned an error code.
392 	 */
393 
394 	if (error) {
395 		UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
396 
397 		/* need to refault (i.e. refresh our lookup) ? */
398 		if (error == ERESTART) {
399 			return (0);
400 		}
401 
402 		/* "try again"?   sleep a bit and retry ... */
403 		if (error == EAGAIN) {
404 			tsleep(&lbolt, PVM, "loanagain", 0);
405 			return (0);
406 		}
407 
408 		/* otherwise flag it as an error */
409 		return (-1);
410 	}
411 
412 	/*
413 	 * we have the page and its owner locked: do the loan now.
414 	 */
415 
416 	pg = anon->an_page;
417 	uvm_lock_pageq();
418 	if (pg->wire_count > 0) {
419 		uvm_unlock_pageq();
420 		UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0);
421 		KASSERT(pg->uobject == NULL);
422 		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
423 		    NULL, anon);
424 		return (-1);
425 	}
426 	if (pg->loan_count == 0) {
427 		pmap_page_protect(pg, VM_PROT_READ);
428 	}
429 	pg->loan_count++;
430 	uvm_pageactivate(pg);
431 	uvm_unlock_pageq();
432 	**output = pg;
433 	(*output)++;
434 
435 	/* unlock anon and return success */
436 	if (pg->uobject)
437 		simple_unlock(&pg->uobject->vmobjlock);
438 	simple_unlock(&anon->an_lock);
439 	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
440 	return (1);
441 }
442 
443 /*
444  * uvm_loanpage: loan out pages to kernel (->K)
445  *
446  * => pages should be object-owned and the object should be locked.
447  * => in the case of error, the object might be unlocked and relocked.
448  * => caller should busy the pages beforehand.
449  * => pages will be unbusied.
450  * => fail with EBUSY if meet a wired page.
451  */
452 static int
453 uvm_loanpage(struct vm_page **pgpp, int npages)
454 {
455 	int i;
456 	int error = 0;
457 
458 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
459 
460 	for (i = 0; i < npages; i++) {
461 		struct vm_page *pg = pgpp[i];
462 
463 		KASSERT(pg->uobject != NULL);
464 		KASSERT(pg->uobject == pgpp[0]->uobject);
465 		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
466 		LOCK_ASSERT(simple_lock_held(&pg->uobject->vmobjlock));
467 		KASSERT(pg->flags & PG_BUSY);
468 
469 		uvm_lock_pageq();
470 		if (pg->wire_count > 0) {
471 			uvm_unlock_pageq();
472 			UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
473 			error = EBUSY;
474 			break;
475 		}
476 		if (pg->loan_count == 0) {
477 			pmap_page_protect(pg, VM_PROT_READ);
478 		}
479 		pg->loan_count++;
480 		uvm_pageactivate(pg);
481 		uvm_unlock_pageq();
482 	}
483 
484 	uvm_page_unbusy(pgpp, npages);
485 
486 	if (error) {
487 		/*
488 		 * backout what we've done
489 		 */
490 		struct simplelock *slock = &pgpp[0]->uobject->vmobjlock;
491 
492 		simple_unlock(slock);
493 		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
494 		simple_lock(slock);
495 	}
496 
497 	UVMHIST_LOG(loanhist, "done %d", error,0,0,0);
498 	return error;
499 }
500 
501 /*
502  * XXX UBC temp limit
503  * number of pages to get at once.
504  * should be <= MAX_READ_AHEAD in genfs_vnops.c
505  */
506 #define	UVM_LOAN_GET_CHUNK	16
507 
508 /*
509  * uvm_loanuobjpages: loan pages from a uobj out (O->K)
510  *
511  * => uobj shouldn't be locked.  (we'll lock it)
512  * => fail with EBUSY if we meet a wired page.
513  */
514 int
515 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
516     struct vm_page **origpgpp)
517 {
518 	int ndone; /* # of pages loaned out */
519 	struct vm_page **pgpp;
520 	int error;
521 	int i;
522 	struct simplelock *slock;
523 
524 	pgpp = origpgpp;
525 	for (ndone = 0; ndone < orignpages; ) {
526 		int npages;
527 		/* npendloan: # of pages busied but not loand out yet. */
528 		int npendloan = 0xdead; /* XXX gcc */
529 reget:
530 		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
531 		simple_lock(&uobj->vmobjlock);
532 		error = (*uobj->pgops->pgo_get)(uobj,
533 		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
534 		    VM_PROT_READ, 0, PGO_SYNCIO);
535 		if (error == EAGAIN) {
536 			tsleep(&lbolt, PVM, "nfsread", 0);
537 			continue;
538 		}
539 		if (error)
540 			goto fail;
541 
542 		KASSERT(npages > 0);
543 
544 		/* loan and unbusy pages */
545 		slock = NULL;
546 		for (i = 0; i < npages; i++) {
547 			struct simplelock *nextslock; /* slock for next page */
548 			struct vm_page *pg = *pgpp;
549 
550 			/* XXX assuming that the page is owned by uobj */
551 			KASSERT(pg->uobject != NULL);
552 			nextslock = &pg->uobject->vmobjlock;
553 
554 			if (slock != nextslock) {
555 				if (slock) {
556 					KASSERT(npendloan > 0);
557 					error = uvm_loanpage(pgpp - npendloan,
558 					    npendloan);
559 					simple_unlock(slock);
560 					if (error)
561 						goto fail;
562 					ndone += npendloan;
563 					KASSERT(origpgpp + ndone == pgpp);
564 				}
565 				slock = nextslock;
566 				npendloan = 0;
567 				simple_lock(slock);
568 			}
569 
570 			if ((pg->flags & PG_RELEASED) != 0) {
571 				/*
572 				 * release pages and try again.
573 				 */
574 				simple_unlock(slock);
575 				for (; i < npages; i++) {
576 					pg = pgpp[i];
577 					slock = &pg->uobject->vmobjlock;
578 
579 					simple_lock(slock);
580 					uvm_lock_pageq();
581 					uvm_page_unbusy(&pg, 1);
582 					uvm_unlock_pageq();
583 					simple_unlock(slock);
584 				}
585 				goto reget;
586 			}
587 
588 			npendloan++;
589 			pgpp++;
590 			KASSERT(origpgpp + ndone + npendloan == pgpp);
591 		}
592 		KASSERT(slock != NULL);
593 		KASSERT(npendloan > 0);
594 		error = uvm_loanpage(pgpp - npendloan, npendloan);
595 		simple_unlock(slock);
596 		if (error)
597 			goto fail;
598 		ndone += npendloan;
599 		KASSERT(origpgpp + ndone == pgpp);
600 	}
601 
602 	return 0;
603 
604 fail:
605 	uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
606 
607 	return error;
608 }
609 
610 /*
611  * uvm_loanuobj: loan a page from a uobj out
612  *
613  * => called with map, amap, uobj locked
614  * => return value:
615  *	-1 = fatal error, everything is unlocked, abort.
616  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
617  *		try again
618  *	 1 = got it, everything still locked
619  */
620 
621 static int
622 uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
623 {
624 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
625 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
626 	struct vm_page *pg;
627 	struct vm_anon *anon;
628 	int error, npages;
629 	bool locked;
630 
631 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
632 
633 	/*
634 	 * first we must make sure the page is resident.
635 	 *
636 	 * XXXCDC: duplicate code with uvm_fault().
637 	 */
638 
639 	simple_lock(&uobj->vmobjlock);
640 	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
641 		npages = 1;
642 		pg = NULL;
643 		error = (*uobj->pgops->pgo_get)(uobj,
644 		    va - ufi->entry->start + ufi->entry->offset,
645 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
646 	} else {
647 		error = EIO;		/* must have pgo_get op */
648 	}
649 
650 	/*
651 	 * check the result of the locked pgo_get.  if there is a problem,
652 	 * then we fail the loan.
653 	 */
654 
655 	if (error && error != EBUSY) {
656 		uvmfault_unlockall(ufi, amap, uobj, NULL);
657 		return (-1);
658 	}
659 
660 	/*
661 	 * if we need to unlock for I/O, do so now.
662 	 */
663 
664 	if (error == EBUSY) {
665 		uvmfault_unlockall(ufi, amap, NULL, NULL);
666 
667 		/* locked: uobj */
668 		npages = 1;
669 		error = (*uobj->pgops->pgo_get)(uobj,
670 		    va - ufi->entry->start + ufi->entry->offset,
671 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
672 		/* locked: <nothing> */
673 
674 		if (error) {
675 			if (error == EAGAIN) {
676 				tsleep(&lbolt, PVM, "fltagain2", 0);
677 				return (0);
678 			}
679 			return (-1);
680 		}
681 
682 		/*
683 		 * pgo_get was a success.   attempt to relock everything.
684 		 */
685 
686 		locked = uvmfault_relock(ufi);
687 		if (locked && amap)
688 			amap_lock(amap);
689 		uobj = pg->uobject;
690 		simple_lock(&uobj->vmobjlock);
691 
692 		/*
693 		 * verify that the page has not be released and re-verify
694 		 * that amap slot is still free.   if there is a problem we
695 		 * drop our lock (thus force a lookup refresh/retry).
696 		 */
697 
698 		if ((pg->flags & PG_RELEASED) != 0 ||
699 		    (locked && amap && amap_lookup(&ufi->entry->aref,
700 		    ufi->orig_rvaddr - ufi->entry->start))) {
701 			if (locked)
702 				uvmfault_unlockall(ufi, amap, NULL, NULL);
703 			locked = false;
704 		}
705 
706 		/*
707 		 * didn't get the lock?   release the page and retry.
708 		 */
709 
710 		if (locked == false) {
711 			if (pg->flags & PG_WANTED) {
712 				wakeup(pg);
713 			}
714 			if (pg->flags & PG_RELEASED) {
715 				uvm_lock_pageq();
716 				uvm_pagefree(pg);
717 				uvm_unlock_pageq();
718 				simple_unlock(&uobj->vmobjlock);
719 				return (0);
720 			}
721 			uvm_lock_pageq();
722 			uvm_pageactivate(pg);
723 			uvm_unlock_pageq();
724 			pg->flags &= ~(PG_BUSY|PG_WANTED);
725 			UVM_PAGE_OWN(pg, NULL);
726 			simple_unlock(&uobj->vmobjlock);
727 			return (0);
728 		}
729 	}
730 
731 	KASSERT(uobj == pg->uobject);
732 
733 	/*
734 	 * at this point we have the page we want ("pg") marked PG_BUSY for us
735 	 * and we have all data structures locked.  do the loanout.  page can
736 	 * not be PG_RELEASED (we caught this above).
737 	 */
738 
739 	if ((flags & UVM_LOAN_TOANON) == 0) {
740 		if (uvm_loanpage(&pg, 1)) {
741 			uvmfault_unlockall(ufi, amap, uobj, NULL);
742 			return (-1);
743 		}
744 		simple_unlock(&uobj->vmobjlock);
745 		**output = pg;
746 		(*output)++;
747 		return (1);
748 	}
749 
750 	/*
751 	 * must be a loan to an anon.   check to see if there is already
752 	 * an anon associated with this page.  if so, then just return
753 	 * a reference to this object.   the page should already be
754 	 * mapped read-only because it is already on loan.
755 	 */
756 
757 	if (pg->uanon) {
758 		anon = pg->uanon;
759 		simple_lock(&anon->an_lock);
760 		anon->an_ref++;
761 		simple_unlock(&anon->an_lock);
762 		if (pg->flags & PG_WANTED) {
763 			wakeup(pg);
764 		}
765 		pg->flags &= ~(PG_WANTED|PG_BUSY);
766 		UVM_PAGE_OWN(pg, NULL);
767 		simple_unlock(&uobj->vmobjlock);
768 		**output = anon;
769 		(*output)++;
770 		return (1);
771 	}
772 
773 	/*
774 	 * need to allocate a new anon
775 	 */
776 
777 	anon = uvm_analloc();
778 	if (anon == NULL) {
779 		goto fail;
780 	}
781 	anon->an_page = pg;
782 	pg->uanon = anon;
783 	uvm_lock_pageq();
784 	if (pg->wire_count > 0) {
785 		uvm_unlock_pageq();
786 		UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
787 		pg->uanon = NULL;
788 		anon->an_page = NULL;
789 		anon->an_ref--;
790 		simple_unlock(&anon->an_lock);
791 		uvm_anfree(anon);
792 		goto fail;
793 	}
794 	if (pg->loan_count == 0) {
795 		pmap_page_protect(pg, VM_PROT_READ);
796 	}
797 	pg->loan_count++;
798 	uvm_pageactivate(pg);
799 	uvm_unlock_pageq();
800 	if (pg->flags & PG_WANTED) {
801 		wakeup(pg);
802 	}
803 	pg->flags &= ~(PG_WANTED|PG_BUSY);
804 	UVM_PAGE_OWN(pg, NULL);
805 	simple_unlock(&uobj->vmobjlock);
806 	simple_unlock(&anon->an_lock);
807 	**output = anon;
808 	(*output)++;
809 	return (1);
810 
811 fail:
812 	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
813 	/*
814 	 * unlock everything and bail out.
815 	 */
816 	if (pg->flags & PG_WANTED) {
817 		wakeup(pg);
818 	}
819 	pg->flags &= ~(PG_WANTED|PG_BUSY);
820 	UVM_PAGE_OWN(pg, NULL);
821 	uvmfault_unlockall(ufi, amap, uobj, NULL);
822 	return (-1);
823 }
824 
825 /*
826  * uvm_loanzero: loan a zero-fill page out
827  *
828  * => called with map, amap, uobj locked
829  * => return value:
830  *	-1 = fatal error, everything is unlocked, abort.
831  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
832  *		try again
833  *	 1 = got it, everything still locked
834  */
835 
836 static struct uvm_object uvm_loanzero_object;
837 
838 static int
839 uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
840 {
841 	struct vm_anon *anon;
842 	struct vm_page *pg;
843 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
844 
845 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
846 again:
847 	simple_lock(&uvm_loanzero_object.vmobjlock);
848 
849 	/*
850 	 * first, get ahold of our single zero page.
851 	 */
852 
853 	if (__predict_false((pg =
854 			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
855 		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
856 					   UVM_PGA_ZERO)) == NULL) {
857 			simple_unlock(&uvm_loanzero_object.vmobjlock);
858 			uvmfault_unlockall(ufi, amap, NULL, NULL);
859 			uvm_wait("loanzero");
860 			if (!uvmfault_relock(ufi)) {
861 				return (0);
862 			}
863 			if (amap) {
864 				amap_lock(amap);
865 			}
866 			goto again;
867 		}
868 
869 		/* got a zero'd page. */
870 		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
871 		pg->flags |= PG_RDONLY;
872 		uvm_lock_pageq();
873 		uvm_pageactivate(pg);
874 		uvm_unlock_pageq();
875 		UVM_PAGE_OWN(pg, NULL);
876 	}
877 
878 	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
879 		uvm_lock_pageq();
880 		pg->loan_count++;
881 		uvm_unlock_pageq();
882 		simple_unlock(&uvm_loanzero_object.vmobjlock);
883 		**output = pg;
884 		(*output)++;
885 		return (1);
886 	}
887 
888 	/*
889 	 * loaning to an anon.  check to see if there is already an anon
890 	 * associated with this page.  if so, then just return a reference
891 	 * to this object.
892 	 */
893 
894 	if (pg->uanon) {
895 		anon = pg->uanon;
896 		simple_lock(&anon->an_lock);
897 		anon->an_ref++;
898 		simple_unlock(&anon->an_lock);
899 		simple_unlock(&uvm_loanzero_object.vmobjlock);
900 		**output = anon;
901 		(*output)++;
902 		return (1);
903 	}
904 
905 	/*
906 	 * need to allocate a new anon
907 	 */
908 
909 	anon = uvm_analloc();
910 	if (anon == NULL) {
911 		/* out of swap causes us to fail */
912 		simple_unlock(&uvm_loanzero_object.vmobjlock);
913 		uvmfault_unlockall(ufi, amap, NULL, NULL);
914 		return (-1);
915 	}
916 	anon->an_page = pg;
917 	pg->uanon = anon;
918 	uvm_lock_pageq();
919 	pg->loan_count++;
920 	uvm_pageactivate(pg);
921 	uvm_unlock_pageq();
922 	simple_unlock(&anon->an_lock);
923 	simple_unlock(&uvm_loanzero_object.vmobjlock);
924 	**output = anon;
925 	(*output)++;
926 	return (1);
927 }
928 
929 
930 /*
931  * uvm_unloananon: kill loans on anons (basically a normal ref drop)
932  *
933  * => we expect all our resources to be unlocked
934  */
935 
936 static void
937 uvm_unloananon(struct vm_anon **aloans, int nanons)
938 {
939 	struct vm_anon *anon;
940 
941 	while (nanons-- > 0) {
942 		int refs;
943 
944 		anon = *aloans++;
945 		simple_lock(&anon->an_lock);
946 		refs = --anon->an_ref;
947 		simple_unlock(&anon->an_lock);
948 
949 		if (refs == 0) {
950 			uvm_anfree(anon);
951 		}
952 	}
953 }
954 
955 /*
956  * uvm_unloanpage: kill loans on pages loaned out to the kernel
957  *
958  * => we expect all our resources to be unlocked
959  */
960 
961 static void
962 uvm_unloanpage(struct vm_page **ploans, int npages)
963 {
964 	struct vm_page *pg;
965 	struct simplelock *slock;
966 
967 	uvm_lock_pageq();
968 	while (npages-- > 0) {
969 		pg = *ploans++;
970 
971 		/*
972 		 * do a little dance to acquire the object or anon lock
973 		 * as appropriate.  we are locking in the wrong order,
974 		 * so we have to do a try-lock here.
975 		 */
976 
977 		slock = NULL;
978 		while (pg->uobject != NULL || pg->uanon != NULL) {
979 			if (pg->uobject != NULL) {
980 				slock = &pg->uobject->vmobjlock;
981 			} else {
982 				slock = &pg->uanon->an_lock;
983 			}
984 			if (simple_lock_try(slock)) {
985 				break;
986 			}
987 			uvm_unlock_pageq();
988 			uvm_lock_pageq();
989 			slock = NULL;
990 		}
991 
992 		/*
993 		 * drop our loan.  if page is owned by an anon but
994 		 * PQ_ANON is not set, the page was loaned to the anon
995 		 * from an object which dropped ownership, so resolve
996 		 * this by turning the anon's loan into real ownership
997 		 * (ie. decrement loan_count again and set PQ_ANON).
998 		 * after all this, if there are no loans left, put the
999 		 * page back a paging queue (if the page is owned by
1000 		 * an anon) or free it (if the page is now unowned).
1001 		 */
1002 
1003 		KASSERT(pg->loan_count > 0);
1004 		pg->loan_count--;
1005 		if (pg->uobject == NULL && pg->uanon != NULL &&
1006 		    (pg->pqflags & PQ_ANON) == 0) {
1007 			KASSERT(pg->loan_count > 0);
1008 			pg->loan_count--;
1009 			pg->pqflags |= PQ_ANON;
1010 		}
1011 		if (pg->loan_count == 0 && pg->uobject == NULL &&
1012 		    pg->uanon == NULL) {
1013 			KASSERT((pg->flags & PG_BUSY) == 0);
1014 			uvm_pagefree(pg);
1015 		}
1016 		if (slock != NULL) {
1017 			simple_unlock(slock);
1018 		}
1019 	}
1020 	uvm_unlock_pageq();
1021 }
1022 
1023 /*
1024  * uvm_unloan: kill loans on pages or anons.
1025  */
1026 
1027 void
1028 uvm_unloan(void *v, int npages, int flags)
1029 {
1030 	if (flags & UVM_LOAN_TOANON) {
1031 		uvm_unloananon(v, npages);
1032 	} else {
1033 		uvm_unloanpage(v, npages);
1034 	}
1035 }
1036 
1037 /*
1038  * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1039  * method, because the page can end up on a paging queue, and the
1040  * page daemon will want to call pgo_put when it encounters the page
1041  * on the inactive list.
1042  */
1043 
1044 static int
1045 ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1046 {
1047 	struct vm_page *pg;
1048 
1049 	KDASSERT(uobj == &uvm_loanzero_object);
1050 
1051 	/*
1052 	 * Don't need to do any work here if we're not freeing pages.
1053 	 */
1054 
1055 	if ((flags & PGO_FREE) == 0) {
1056 		simple_unlock(&uobj->vmobjlock);
1057 		return 0;
1058 	}
1059 
1060 	/*
1061 	 * we don't actually want to ever free the uvm_loanzero_page, so
1062 	 * just reactivate or dequeue it.
1063 	 */
1064 
1065 	pg = TAILQ_FIRST(&uobj->memq);
1066 	KASSERT(pg != NULL);
1067 	KASSERT(TAILQ_NEXT(pg, listq) == NULL);
1068 
1069 	uvm_lock_pageq();
1070 	if (pg->uanon)
1071 		uvm_pageactivate(pg);
1072 	else
1073 		uvm_pagedequeue(pg);
1074 	uvm_unlock_pageq();
1075 
1076 	simple_unlock(&uobj->vmobjlock);
1077 	return 0;
1078 }
1079 
1080 static const struct uvm_pagerops ulz_pager = {
1081 	.pgo_put = ulz_put,
1082 };
1083 
1084 /*
1085  * uvm_loan_init(): initialize the uvm_loan() facility.
1086  */
1087 
1088 void
1089 uvm_loan_init(void)
1090 {
1091 
1092 	simple_lock_init(&uvm_loanzero_object.vmobjlock);
1093 	TAILQ_INIT(&uvm_loanzero_object.memq);
1094 	uvm_loanzero_object.pgops = &ulz_pager;
1095 
1096 	UVMHIST_INIT(loanhist, 300);
1097 }
1098 
1099 /*
1100  * uvm_loanbreak: break loan on a uobj page
1101  *
1102  * => called with uobj locked
1103  * => the page should be busy
1104  * => return value:
1105  *	newly allocated page if succeeded
1106  */
1107 struct vm_page *
1108 uvm_loanbreak(struct vm_page *uobjpage)
1109 {
1110 	struct vm_page *pg;
1111 #ifdef DIAGNOSTIC
1112 	struct uvm_object *uobj = uobjpage->uobject;
1113 #endif
1114 
1115 	KASSERT(uobj != NULL);
1116 	LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
1117 	KASSERT(uobjpage->flags & PG_BUSY);
1118 
1119 	/* alloc new un-owned page */
1120 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1121 	if (pg == NULL)
1122 		return NULL;
1123 
1124 	/*
1125 	 * copy the data from the old page to the new
1126 	 * one and clear the fake flags on the new page (keep it busy).
1127 	 * force a reload of the old page by clearing it from all
1128 	 * pmaps.
1129 	 * transfer dirtiness of the old page to the new page.
1130 	 * then lock the page queues to rename the pages.
1131 	 */
1132 
1133 	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1134 	pg->flags &= ~PG_FAKE;
1135 	pmap_page_protect(uobjpage, VM_PROT_NONE);
1136 	if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) {
1137 		pmap_clear_modify(pg);
1138 		pg->flags |= PG_CLEAN;
1139 	} else {
1140 		/* uvm_pagecopy marked it dirty */
1141 		KASSERT((pg->flags & PG_CLEAN) == 0);
1142 		/* a object with a dirty page should be dirty. */
1143 		KASSERT(!UVM_OBJ_IS_CLEAN(uobj));
1144 	}
1145 	if (uobjpage->flags & PG_WANTED)
1146 		wakeup(uobjpage);
1147 	/* uobj still locked */
1148 	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1149 	UVM_PAGE_OWN(uobjpage, NULL);
1150 
1151 	uvm_lock_pageq();
1152 
1153 	/*
1154 	 * replace uobjpage with new page.
1155 	 */
1156 
1157 	uvm_pagereplace(uobjpage, pg);
1158 
1159 	/*
1160 	 * if the page is no longer referenced by
1161 	 * an anon (i.e. we are breaking an O->K
1162 	 * loan), then remove it from any pageq's.
1163 	 */
1164 	if (uobjpage->uanon == NULL)
1165 		uvm_pagedequeue(uobjpage);
1166 
1167 	/*
1168 	 * at this point we have absolutely no
1169 	 * control over uobjpage
1170 	 */
1171 
1172 	/* install new page */
1173 	uvm_pageactivate(pg);
1174 	uvm_unlock_pageq();
1175 
1176 	/*
1177 	 * done!  loan is broken and "pg" is
1178 	 * PG_BUSY.   it can now replace uobjpage.
1179 	 */
1180 
1181 	return pg;
1182 }
1183