xref: /netbsd-src/sys/uvm/uvm_loan.c (revision 413d532bcc3f62d122e56d92e13ac64825a40baf)
1 /*	$NetBSD: uvm_loan.c,v 1.83 2012/07/30 23:56:48 matt Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
28  */
29 
30 /*
31  * uvm_loan.c: page loanout handler
32  */
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.83 2012/07/30 23:56:48 matt Exp $");
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/mman.h>
41 
42 #include <uvm/uvm.h>
43 
44 #ifdef UVMHIST
45 UVMHIST_DEFINE(loanhist);
46 #endif
47 
48 /*
49  * "loaned" pages are pages which are (read-only, copy-on-write) loaned
50  * from the VM system to other parts of the kernel.   this allows page
51  * copying to be avoided (e.g. you can loan pages from objs/anons to
52  * the mbuf system).
53  *
54  * there are 3 types of loans possible:
55  *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
56  *  A->K  anon page to wired kernel page (e.g. mbuf data area)
57  *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
58  * note that it possible to have an O page loaned to both an A and K
59  * at the same time.
60  *
61  * loans are tracked by pg->loan_count.  an O->A page will have both
62  * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
63  * of page is considered "owned" by the uvm_object (not the anon).
64  *
65  * each loan of a page to the kernel bumps the pg->wire_count.  the
66  * kernel mappings for these pages will be read-only and wired.  since
67  * the page will also be wired, it will not be a candidate for pageout,
68  * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
69  * write fault in the kernel to one of these pages will not cause
70  * copy-on-write.  instead, the page fault is considered fatal.  this
71  * is because the kernel mapping will have no way to look up the
72  * object/anon which the page is owned by.  this is a good side-effect,
73  * since a kernel write to a loaned page is an error.
74  *
75  * owners that want to free their pages and discover that they are
76  * loaned out simply "disown" them (the page becomes an orphan).  these
77  * pages should be freed when the last loan is dropped.   in some cases
78  * an anon may "adopt" an orphaned page.
79  *
80  * locking: to read pg->loan_count either the owner or the page queues
81  * must be locked.   to modify pg->loan_count, both the owner of the page
82  * and the PQs must be locked.   pg->flags is (as always) locked by
83  * the owner of the page.
84  *
85  * note that locking from the "loaned" side is tricky since the object
86  * getting the loaned page has no reference to the page's owner and thus
87  * the owner could "die" at any time.   in order to prevent the owner
88  * from dying the page queues should be locked.   this forces us to sometimes
89  * use "try" locking.
90  *
91  * loans are typically broken by the following events:
92  *  1. user-level xwrite fault to a loaned page
93  *  2. pageout of clean+inactive O->A loaned page
94  *  3. owner frees page (e.g. pager flush)
95  *
96  * note that loaning a page causes all mappings of the page to become
97  * read-only (via pmap_page_protect).   this could have an unexpected
98  * effect on normal "wired" pages if one is not careful (XXX).
99  */
100 
101 /*
102  * local prototypes
103  */
104 
105 static int	uvm_loananon(struct uvm_faultinfo *, void ***,
106 			     int, struct vm_anon *);
107 static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
108 			     int, vaddr_t);
109 static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
110 static void	uvm_unloananon(struct vm_anon **, int);
111 static void	uvm_unloanpage(struct vm_page **, int);
112 static int	uvm_loanpage(struct vm_page **, int);
113 
114 
115 /*
116  * inlines
117  */
118 
119 /*
120  * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
121  *
122  * => "ufi" is the result of a successful map lookup (meaning that
123  *	on entry the map is locked by the caller)
124  * => we may unlock and then relock the map if needed (for I/O)
125  * => we put our output result in "output"
126  * => we always return with the map unlocked
127  * => possible return values:
128  *	-1 == error, map is unlocked
129  *	 0 == map relock error (try again!), map is unlocked
130  *	>0 == number of pages we loaned, map is unlocked
131  *
132  * NOTE: We can live with this being an inline, because it is only called
133  * from one place.
134  */
135 
136 static inline int
137 uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
138 {
139 	vaddr_t curaddr = ufi->orig_rvaddr;
140 	vsize_t togo = ufi->size;
141 	struct vm_aref *aref = &ufi->entry->aref;
142 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
143 	struct vm_anon *anon;
144 	int rv, result = 0;
145 
146 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
147 
148 	/*
149 	 * lock us the rest of the way down (we unlock before return)
150 	 */
151 	if (aref->ar_amap) {
152 		amap_lock(aref->ar_amap);
153 	}
154 
155 	/*
156 	 * loop until done
157 	 */
158 	while (togo) {
159 
160 		/*
161 		 * find the page we want.   check the anon layer first.
162 		 */
163 
164 		if (aref->ar_amap) {
165 			anon = amap_lookup(aref, curaddr - ufi->entry->start);
166 		} else {
167 			anon = NULL;
168 		}
169 
170 		/* locked: map, amap, uobj */
171 		if (anon) {
172 			rv = uvm_loananon(ufi, output, flags, anon);
173 		} else if (uobj) {
174 			rv = uvm_loanuobj(ufi, output, flags, curaddr);
175 		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
176 			rv = uvm_loanzero(ufi, output, flags);
177 		} else {
178 			uvmfault_unlockall(ufi, aref->ar_amap, uobj);
179 			rv = -1;
180 		}
181 		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
182 		KASSERT(rv > 0 || aref->ar_amap == NULL ||
183 		    !mutex_owned(aref->ar_amap->am_lock));
184 		KASSERT(rv > 0 || uobj == NULL ||
185 		    !mutex_owned(uobj->vmobjlock));
186 
187 		/* total failure */
188 		if (rv < 0) {
189 			UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0);
190 			return (-1);
191 		}
192 
193 		/* relock failed, need to do another lookup */
194 		if (rv == 0) {
195 			UVMHIST_LOG(loanhist, "relock failure %d", result
196 			    ,0,0,0);
197 			return (result);
198 		}
199 
200 		/*
201 		 * got it... advance to next page
202 		 */
203 
204 		result++;
205 		togo -= PAGE_SIZE;
206 		curaddr += PAGE_SIZE;
207 	}
208 
209 	/*
210 	 * unlock what we locked, unlock the maps and return
211 	 */
212 
213 	if (aref->ar_amap) {
214 		amap_unlock(aref->ar_amap);
215 	}
216 	uvmfault_unlockmaps(ufi, false);
217 	UVMHIST_LOG(loanhist, "done %d", result, 0,0,0);
218 	return (result);
219 }
220 
221 /*
222  * normal functions
223  */
224 
225 /*
226  * uvm_loan: loan pages in a map out to anons or to the kernel
227  *
228  * => map should be unlocked
229  * => start and len should be multiples of PAGE_SIZE
230  * => result is either an array of anon's or vm_pages (depending on flags)
231  * => flag values: UVM_LOAN_TOANON - loan to anons
232  *                 UVM_LOAN_TOPAGE - loan to wired kernel page
233  *    one and only one of these flags must be set!
234  * => returns 0 (success), or an appropriate error number
235  */
236 
237 int
238 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
239 {
240 	struct uvm_faultinfo ufi;
241 	void **result, **output;
242 	int rv, error;
243 
244 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
245 
246 	/*
247 	 * ensure that one and only one of the flags is set
248 	 */
249 
250 	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
251 		((flags & UVM_LOAN_TOPAGE) == 0));
252 
253 	/*
254 	 * "output" is a pointer to the current place to put the loaned page.
255 	 */
256 
257 	result = v;
258 	output = &result[0];	/* start at the beginning ... */
259 
260 	/*
261 	 * while we've got pages to do
262 	 */
263 
264 	while (len > 0) {
265 
266 		/*
267 		 * fill in params for a call to uvmfault_lookup
268 		 */
269 
270 		ufi.orig_map = map;
271 		ufi.orig_rvaddr = start;
272 		ufi.orig_size = len;
273 
274 		/*
275 		 * do the lookup, the only time this will fail is if we hit on
276 		 * an unmapped region (an error)
277 		 */
278 
279 		if (!uvmfault_lookup(&ufi, false)) {
280 			error = ENOENT;
281 			goto fail;
282 		}
283 
284 		/*
285 		 * map now locked.  now do the loanout...
286 		 */
287 
288 		rv = uvm_loanentry(&ufi, &output, flags);
289 		if (rv < 0) {
290 			/* all unlocked due to error */
291 			error = EINVAL;
292 			goto fail;
293 		}
294 
295 		/*
296 		 * done!  the map is unlocked.  advance, if possible.
297 		 *
298 		 * XXXCDC: could be recoded to hold the map lock with
299 		 *	   smarter code (but it only happens on map entry
300 		 *	   boundaries, so it isn't that bad).
301 		 */
302 
303 		if (rv) {
304 			rv <<= PAGE_SHIFT;
305 			len -= rv;
306 			start += rv;
307 		}
308 	}
309 	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
310 	return 0;
311 
312 fail:
313 	/*
314 	 * failed to complete loans.  drop any loans and return failure code.
315 	 * map is already unlocked.
316 	 */
317 
318 	if (output - result) {
319 		if (flags & UVM_LOAN_TOANON) {
320 			uvm_unloananon((struct vm_anon **)result,
321 			    output - result);
322 		} else {
323 			uvm_unloanpage((struct vm_page **)result,
324 			    output - result);
325 		}
326 	}
327 	UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
328 	return (error);
329 }
330 
331 /*
332  * uvm_loananon: loan a page from an anon out
333  *
334  * => called with map, amap, uobj locked
335  * => return value:
336  *	-1 = fatal error, everything is unlocked, abort.
337  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
338  *		try again
339  *	 1 = got it, everything still locked
340  */
341 
342 int
343 uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
344     struct vm_anon *anon)
345 {
346 	struct vm_page *pg;
347 	int error;
348 
349 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
350 
351 	/*
352 	 * if we are loaning to "another" anon then it is easy, we just
353 	 * bump the reference count on the current anon and return a
354 	 * pointer to it (it becomes copy-on-write shared).
355 	 */
356 
357 	if (flags & UVM_LOAN_TOANON) {
358 		KASSERT(mutex_owned(anon->an_lock));
359 		pg = anon->an_page;
360 		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
361 			if (pg->wire_count > 0) {
362 				UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0);
363 				uvmfault_unlockall(ufi,
364 				    ufi->entry->aref.ar_amap,
365 				    ufi->entry->object.uvm_obj);
366 				return (-1);
367 			}
368 			pmap_page_protect(pg, VM_PROT_READ);
369 		}
370 		anon->an_ref++;
371 		**output = anon;
372 		(*output)++;
373 		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
374 		return (1);
375 	}
376 
377 	/*
378 	 * we are loaning to a kernel-page.   we need to get the page
379 	 * resident so we can wire it.   uvmfault_anonget will handle
380 	 * this for us.
381 	 */
382 
383 	KASSERT(mutex_owned(anon->an_lock));
384 	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
385 
386 	/*
387 	 * if we were unable to get the anon, then uvmfault_anonget has
388 	 * unlocked everything and returned an error code.
389 	 */
390 
391 	if (error) {
392 		UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
393 
394 		/* need to refault (i.e. refresh our lookup) ? */
395 		if (error == ERESTART) {
396 			return (0);
397 		}
398 
399 		/* "try again"?   sleep a bit and retry ... */
400 		if (error == EAGAIN) {
401 			kpause("loanagain", false, hz/2, NULL);
402 			return (0);
403 		}
404 
405 		/* otherwise flag it as an error */
406 		return (-1);
407 	}
408 
409 	/*
410 	 * we have the page and its owner locked: do the loan now.
411 	 */
412 
413 	pg = anon->an_page;
414 	mutex_enter(&uvm_pageqlock);
415 	if (pg->wire_count > 0) {
416 		mutex_exit(&uvm_pageqlock);
417 		UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0);
418 		KASSERT(pg->uobject == NULL);
419 		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL);
420 		return (-1);
421 	}
422 	if (pg->loan_count == 0) {
423 		pmap_page_protect(pg, VM_PROT_READ);
424 	}
425 	pg->loan_count++;
426 	uvm_pageactivate(pg);
427 	mutex_exit(&uvm_pageqlock);
428 	**output = pg;
429 	(*output)++;
430 
431 	/* unlock and return success */
432 	if (pg->uobject)
433 		mutex_exit(pg->uobject->vmobjlock);
434 	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
435 	return (1);
436 }
437 
438 /*
439  * uvm_loanpage: loan out pages to kernel (->K)
440  *
441  * => pages should be object-owned and the object should be locked.
442  * => in the case of error, the object might be unlocked and relocked.
443  * => caller should busy the pages beforehand.
444  * => pages will be unbusied.
445  * => fail with EBUSY if meet a wired page.
446  */
447 static int
448 uvm_loanpage(struct vm_page **pgpp, int npages)
449 {
450 	int i;
451 	int error = 0;
452 
453 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
454 
455 	for (i = 0; i < npages; i++) {
456 		struct vm_page *pg = pgpp[i];
457 
458 		KASSERT(pg->uobject != NULL);
459 		KASSERT(pg->uobject == pgpp[0]->uobject);
460 		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
461 		KASSERT(mutex_owned(pg->uobject->vmobjlock));
462 		KASSERT(pg->flags & PG_BUSY);
463 
464 		mutex_enter(&uvm_pageqlock);
465 		if (pg->wire_count > 0) {
466 			mutex_exit(&uvm_pageqlock);
467 			UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
468 			error = EBUSY;
469 			break;
470 		}
471 		if (pg->loan_count == 0) {
472 			pmap_page_protect(pg, VM_PROT_READ);
473 		}
474 		pg->loan_count++;
475 		uvm_pageactivate(pg);
476 		mutex_exit(&uvm_pageqlock);
477 	}
478 
479 	uvm_page_unbusy(pgpp, npages);
480 
481 	if (error) {
482 		/*
483 		 * backout what we've done
484 		 */
485 		kmutex_t *slock = pgpp[0]->uobject->vmobjlock;
486 
487 		mutex_exit(slock);
488 		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
489 		mutex_enter(slock);
490 	}
491 
492 	UVMHIST_LOG(loanhist, "done %d", error,0,0,0);
493 	return error;
494 }
495 
496 /*
497  * XXX UBC temp limit
498  * number of pages to get at once.
499  * should be <= MAX_READ_AHEAD in genfs_vnops.c
500  */
501 #define	UVM_LOAN_GET_CHUNK	16
502 
503 /*
504  * uvm_loanuobjpages: loan pages from a uobj out (O->K)
505  *
506  * => uobj shouldn't be locked.  (we'll lock it)
507  * => fail with EBUSY if we meet a wired page.
508  */
509 int
510 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
511     struct vm_page **origpgpp)
512 {
513 	int ndone; /* # of pages loaned out */
514 	struct vm_page **pgpp;
515 	int error;
516 	int i;
517 	kmutex_t *slock;
518 
519 	pgpp = origpgpp;
520 	for (ndone = 0; ndone < orignpages; ) {
521 		int npages;
522 		/* npendloan: # of pages busied but not loand out yet. */
523 		int npendloan = 0xdead; /* XXX gcc */
524 reget:
525 		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
526 		mutex_enter(uobj->vmobjlock);
527 		error = (*uobj->pgops->pgo_get)(uobj,
528 		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
529 		    VM_PROT_READ, 0, PGO_SYNCIO);
530 		if (error == EAGAIN) {
531 			kpause("loanuopg", false, hz/2, NULL);
532 			continue;
533 		}
534 		if (error)
535 			goto fail;
536 
537 		KASSERT(npages > 0);
538 
539 		/* loan and unbusy pages */
540 		slock = NULL;
541 		for (i = 0; i < npages; i++) {
542 			kmutex_t *nextslock; /* slock for next page */
543 			struct vm_page *pg = *pgpp;
544 
545 			/* XXX assuming that the page is owned by uobj */
546 			KASSERT(pg->uobject != NULL);
547 			nextslock = pg->uobject->vmobjlock;
548 
549 			if (slock != nextslock) {
550 				if (slock) {
551 					KASSERT(npendloan > 0);
552 					error = uvm_loanpage(pgpp - npendloan,
553 					    npendloan);
554 					mutex_exit(slock);
555 					if (error)
556 						goto fail;
557 					ndone += npendloan;
558 					KASSERT(origpgpp + ndone == pgpp);
559 				}
560 				slock = nextslock;
561 				npendloan = 0;
562 				mutex_enter(slock);
563 			}
564 
565 			if ((pg->flags & PG_RELEASED) != 0) {
566 				/*
567 				 * release pages and try again.
568 				 */
569 				mutex_exit(slock);
570 				for (; i < npages; i++) {
571 					pg = pgpp[i];
572 					slock = pg->uobject->vmobjlock;
573 
574 					mutex_enter(slock);
575 					mutex_enter(&uvm_pageqlock);
576 					uvm_page_unbusy(&pg, 1);
577 					mutex_exit(&uvm_pageqlock);
578 					mutex_exit(slock);
579 				}
580 				goto reget;
581 			}
582 
583 			npendloan++;
584 			pgpp++;
585 			KASSERT(origpgpp + ndone + npendloan == pgpp);
586 		}
587 		KASSERT(slock != NULL);
588 		KASSERT(npendloan > 0);
589 		error = uvm_loanpage(pgpp - npendloan, npendloan);
590 		mutex_exit(slock);
591 		if (error)
592 			goto fail;
593 		ndone += npendloan;
594 		KASSERT(origpgpp + ndone == pgpp);
595 	}
596 
597 	return 0;
598 
599 fail:
600 	uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
601 
602 	return error;
603 }
604 
605 /*
606  * uvm_loanuobj: loan a page from a uobj out
607  *
608  * => called with map, amap, uobj locked
609  * => return value:
610  *	-1 = fatal error, everything is unlocked, abort.
611  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
612  *		try again
613  *	 1 = got it, everything still locked
614  */
615 
616 static int
617 uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
618 {
619 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
620 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
621 	struct vm_page *pg;
622 	int error, npages;
623 	bool locked;
624 
625 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
626 
627 	/*
628 	 * first we must make sure the page is resident.
629 	 *
630 	 * XXXCDC: duplicate code with uvm_fault().
631 	 */
632 
633 	/* locked: maps(read), amap(if there) */
634 	mutex_enter(uobj->vmobjlock);
635 	/* locked: maps(read), amap(if there), uobj */
636 
637 	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
638 		npages = 1;
639 		pg = NULL;
640 		error = (*uobj->pgops->pgo_get)(uobj,
641 		    va - ufi->entry->start + ufi->entry->offset,
642 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
643 	} else {
644 		error = EIO;		/* must have pgo_get op */
645 	}
646 
647 	/*
648 	 * check the result of the locked pgo_get.  if there is a problem,
649 	 * then we fail the loan.
650 	 */
651 
652 	if (error && error != EBUSY) {
653 		uvmfault_unlockall(ufi, amap, uobj);
654 		return (-1);
655 	}
656 
657 	/*
658 	 * if we need to unlock for I/O, do so now.
659 	 */
660 
661 	if (error == EBUSY) {
662 		uvmfault_unlockall(ufi, amap, NULL);
663 
664 		/* locked: uobj */
665 		npages = 1;
666 		error = (*uobj->pgops->pgo_get)(uobj,
667 		    va - ufi->entry->start + ufi->entry->offset,
668 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
669 		/* locked: <nothing> */
670 
671 		if (error) {
672 			if (error == EAGAIN) {
673 				kpause("fltagain2", false, hz/2, NULL);
674 				return (0);
675 			}
676 			return (-1);
677 		}
678 
679 		/*
680 		 * pgo_get was a success.   attempt to relock everything.
681 		 */
682 
683 		locked = uvmfault_relock(ufi);
684 		if (locked && amap)
685 			amap_lock(amap);
686 		uobj = pg->uobject;
687 		mutex_enter(uobj->vmobjlock);
688 
689 		/*
690 		 * verify that the page has not be released and re-verify
691 		 * that amap slot is still free.   if there is a problem we
692 		 * drop our lock (thus force a lookup refresh/retry).
693 		 */
694 
695 		if ((pg->flags & PG_RELEASED) != 0 ||
696 		    (locked && amap && amap_lookup(&ufi->entry->aref,
697 		    ufi->orig_rvaddr - ufi->entry->start))) {
698 			if (locked)
699 				uvmfault_unlockall(ufi, amap, NULL);
700 			locked = false;
701 		}
702 
703 		/*
704 		 * didn't get the lock?   release the page and retry.
705 		 */
706 
707 		if (locked == false) {
708 			if (pg->flags & PG_WANTED) {
709 				wakeup(pg);
710 			}
711 			if (pg->flags & PG_RELEASED) {
712 				mutex_enter(&uvm_pageqlock);
713 				uvm_pagefree(pg);
714 				mutex_exit(&uvm_pageqlock);
715 				mutex_exit(uobj->vmobjlock);
716 				return (0);
717 			}
718 			mutex_enter(&uvm_pageqlock);
719 			uvm_pageactivate(pg);
720 			mutex_exit(&uvm_pageqlock);
721 			pg->flags &= ~(PG_BUSY|PG_WANTED);
722 			UVM_PAGE_OWN(pg, NULL);
723 			mutex_exit(uobj->vmobjlock);
724 			return (0);
725 		}
726 	}
727 
728 	KASSERT(uobj == pg->uobject);
729 
730 	/*
731 	 * at this point we have the page we want ("pg") marked PG_BUSY for us
732 	 * and we have all data structures locked.  do the loanout.  page can
733 	 * not be PG_RELEASED (we caught this above).
734 	 */
735 
736 	if ((flags & UVM_LOAN_TOANON) == 0) {
737 		if (uvm_loanpage(&pg, 1)) {
738 			uvmfault_unlockall(ufi, amap, uobj);
739 			return (-1);
740 		}
741 		mutex_exit(uobj->vmobjlock);
742 		**output = pg;
743 		(*output)++;
744 		return (1);
745 	}
746 
747 #ifdef notdef
748 	/*
749 	 * must be a loan to an anon.   check to see if there is already
750 	 * an anon associated with this page.  if so, then just return
751 	 * a reference to this object.   the page should already be
752 	 * mapped read-only because it is already on loan.
753 	 */
754 
755 	if (pg->uanon) {
756 		/* XXX: locking */
757 		anon = pg->uanon;
758 		anon->an_ref++;
759 		if (pg->flags & PG_WANTED) {
760 			wakeup(pg);
761 		}
762 		pg->flags &= ~(PG_WANTED|PG_BUSY);
763 		UVM_PAGE_OWN(pg, NULL);
764 		mutex_exit(uobj->vmobjlock);
765 		**output = anon;
766 		(*output)++;
767 		return (1);
768 	}
769 
770 	/*
771 	 * need to allocate a new anon
772 	 */
773 
774 	anon = uvm_analloc();
775 	if (anon == NULL) {
776 		goto fail;
777 	}
778 	mutex_enter(&uvm_pageqlock);
779 	if (pg->wire_count > 0) {
780 		mutex_exit(&uvm_pageqlock);
781 		UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
782 		goto fail;
783 	}
784 	if (pg->loan_count == 0) {
785 		pmap_page_protect(pg, VM_PROT_READ);
786 	}
787 	pg->loan_count++;
788 	pg->uanon = anon;
789 	anon->an_page = pg;
790 	anon->an_lock = /* TODO: share amap lock */
791 	uvm_pageactivate(pg);
792 	mutex_exit(&uvm_pageqlock);
793 	if (pg->flags & PG_WANTED) {
794 		wakeup(pg);
795 	}
796 	pg->flags &= ~(PG_WANTED|PG_BUSY);
797 	UVM_PAGE_OWN(pg, NULL);
798 	mutex_exit(uobj->vmobjlock);
799 	mutex_exit(&anon->an_lock);
800 	**output = anon;
801 	(*output)++;
802 	return (1);
803 
804 fail:
805 	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
806 	/*
807 	 * unlock everything and bail out.
808 	 */
809 	if (pg->flags & PG_WANTED) {
810 		wakeup(pg);
811 	}
812 	pg->flags &= ~(PG_WANTED|PG_BUSY);
813 	UVM_PAGE_OWN(pg, NULL);
814 	uvmfault_unlockall(ufi, amap, uobj, NULL);
815 	if (anon) {
816 		anon->an_ref--;
817 		uvm_anon_free(anon);
818 	}
819 #endif	/* notdef */
820 	return (-1);
821 }
822 
823 /*
824  * uvm_loanzero: loan a zero-fill page out
825  *
826  * => called with map, amap, uobj locked
827  * => return value:
828  *	-1 = fatal error, everything is unlocked, abort.
829  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
830  *		try again
831  *	 1 = got it, everything still locked
832  */
833 
834 static struct uvm_object uvm_loanzero_object;
835 static kmutex_t uvm_loanzero_lock;
836 
837 static int
838 uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
839 {
840 	struct vm_page *pg;
841 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
842 
843 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
844 again:
845 	mutex_enter(uvm_loanzero_object.vmobjlock);
846 
847 	/*
848 	 * first, get ahold of our single zero page.
849 	 */
850 
851 	if (__predict_false((pg =
852 			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
853 		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
854 					   UVM_PGA_ZERO)) == NULL) {
855 			mutex_exit(uvm_loanzero_object.vmobjlock);
856 			uvmfault_unlockall(ufi, amap, NULL);
857 			uvm_wait("loanzero");
858 			if (!uvmfault_relock(ufi)) {
859 				return (0);
860 			}
861 			if (amap) {
862 				amap_lock(amap);
863 			}
864 			goto again;
865 		}
866 
867 		/* got a zero'd page. */
868 		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
869 		pg->flags |= PG_RDONLY;
870 		mutex_enter(&uvm_pageqlock);
871 		uvm_pageactivate(pg);
872 		mutex_exit(&uvm_pageqlock);
873 		UVM_PAGE_OWN(pg, NULL);
874 	}
875 
876 	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
877 		mutex_enter(&uvm_pageqlock);
878 		pg->loan_count++;
879 		mutex_exit(&uvm_pageqlock);
880 		mutex_exit(uvm_loanzero_object.vmobjlock);
881 		**output = pg;
882 		(*output)++;
883 		return (1);
884 	}
885 
886 #ifdef notdef
887 	/*
888 	 * loaning to an anon.  check to see if there is already an anon
889 	 * associated with this page.  if so, then just return a reference
890 	 * to this object.
891 	 */
892 
893 	if (pg->uanon) {
894 		anon = pg->uanon;
895 		mutex_enter(&anon->an_lock);
896 		anon->an_ref++;
897 		mutex_exit(&anon->an_lock);
898 		mutex_exit(uvm_loanzero_object.vmobjlock);
899 		**output = anon;
900 		(*output)++;
901 		return (1);
902 	}
903 
904 	/*
905 	 * need to allocate a new anon
906 	 */
907 
908 	anon = uvm_analloc();
909 	if (anon == NULL) {
910 		/* out of swap causes us to fail */
911 		mutex_exit(uvm_loanzero_object.vmobjlock);
912 		uvmfault_unlockall(ufi, amap, NULL, NULL);
913 		return (-1);
914 	}
915 	anon->an_page = pg;
916 	pg->uanon = anon;
917 	mutex_enter(&uvm_pageqlock);
918 	pg->loan_count++;
919 	uvm_pageactivate(pg);
920 	mutex_exit(&uvm_pageqlock);
921 	mutex_exit(&anon->an_lock);
922 	mutex_exit(uvm_loanzero_object.vmobjlock);
923 	**output = anon;
924 	(*output)++;
925 	return (1);
926 #else
927 	return (-1);
928 #endif
929 }
930 
931 
932 /*
933  * uvm_unloananon: kill loans on anons (basically a normal ref drop)
934  *
935  * => we expect all our resources to be unlocked
936  */
937 
938 static void
939 uvm_unloananon(struct vm_anon **aloans, int nanons)
940 {
941 #ifdef notdef
942 	struct vm_anon *anon, *to_free = NULL;
943 
944 	/* TODO: locking */
945 	amap_lock(amap);
946 	while (nanons-- > 0) {
947 		anon = *aloans++;
948 		if (--anon->an_ref == 0) {
949 			anon->an_link = to_free;
950 			to_free = anon;
951 		}
952 	}
953 	uvm_anon_freelst(amap, to_free);
954 #endif	/* notdef */
955 }
956 
957 /*
958  * uvm_unloanpage: kill loans on pages loaned out to the kernel
959  *
960  * => we expect all our resources to be unlocked
961  */
962 
963 static void
964 uvm_unloanpage(struct vm_page **ploans, int npages)
965 {
966 	struct vm_page *pg;
967 	kmutex_t *slock;
968 
969 	mutex_enter(&uvm_pageqlock);
970 	while (npages-- > 0) {
971 		pg = *ploans++;
972 
973 		/*
974 		 * do a little dance to acquire the object or anon lock
975 		 * as appropriate.  we are locking in the wrong order,
976 		 * so we have to do a try-lock here.
977 		 */
978 
979 		slock = NULL;
980 		while (pg->uobject != NULL || pg->uanon != NULL) {
981 			if (pg->uobject != NULL) {
982 				slock = pg->uobject->vmobjlock;
983 			} else {
984 				slock = pg->uanon->an_lock;
985 			}
986 			if (mutex_tryenter(slock)) {
987 				break;
988 			}
989 			/* XXX Better than yielding but inadequate. */
990 			kpause("livelock", false, 1, &uvm_pageqlock);
991 			slock = NULL;
992 		}
993 
994 		/*
995 		 * drop our loan.  if page is owned by an anon but
996 		 * PQ_ANON is not set, the page was loaned to the anon
997 		 * from an object which dropped ownership, so resolve
998 		 * this by turning the anon's loan into real ownership
999 		 * (ie. decrement loan_count again and set PQ_ANON).
1000 		 * after all this, if there are no loans left, put the
1001 		 * page back a paging queue (if the page is owned by
1002 		 * an anon) or free it (if the page is now unowned).
1003 		 */
1004 
1005 		KASSERT(pg->loan_count > 0);
1006 		pg->loan_count--;
1007 		if (pg->uobject == NULL && pg->uanon != NULL &&
1008 		    (pg->pqflags & PQ_ANON) == 0) {
1009 			KASSERT(pg->loan_count > 0);
1010 			pg->loan_count--;
1011 			pg->pqflags |= PQ_ANON;
1012 		}
1013 		if (pg->loan_count == 0 && pg->uobject == NULL &&
1014 		    pg->uanon == NULL) {
1015 			KASSERT((pg->flags & PG_BUSY) == 0);
1016 			uvm_pagefree(pg);
1017 		}
1018 		if (slock != NULL) {
1019 			mutex_exit(slock);
1020 		}
1021 	}
1022 	mutex_exit(&uvm_pageqlock);
1023 }
1024 
1025 /*
1026  * uvm_unloan: kill loans on pages or anons.
1027  */
1028 
1029 void
1030 uvm_unloan(void *v, int npages, int flags)
1031 {
1032 	if (flags & UVM_LOAN_TOANON) {
1033 		uvm_unloananon(v, npages);
1034 	} else {
1035 		uvm_unloanpage(v, npages);
1036 	}
1037 }
1038 
1039 /*
1040  * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1041  * method, because the page can end up on a paging queue, and the
1042  * page daemon will want to call pgo_put when it encounters the page
1043  * on the inactive list.
1044  */
1045 
1046 static int
1047 ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1048 {
1049 	struct vm_page *pg;
1050 
1051 	KDASSERT(uobj == &uvm_loanzero_object);
1052 
1053 	/*
1054 	 * Don't need to do any work here if we're not freeing pages.
1055 	 */
1056 
1057 	if ((flags & PGO_FREE) == 0) {
1058 		mutex_exit(uobj->vmobjlock);
1059 		return 0;
1060 	}
1061 
1062 	/*
1063 	 * we don't actually want to ever free the uvm_loanzero_page, so
1064 	 * just reactivate or dequeue it.
1065 	 */
1066 
1067 	pg = TAILQ_FIRST(&uobj->memq);
1068 	KASSERT(pg != NULL);
1069 	KASSERT(TAILQ_NEXT(pg, listq.queue) == NULL);
1070 
1071 	mutex_enter(&uvm_pageqlock);
1072 	if (pg->uanon)
1073 		uvm_pageactivate(pg);
1074 	else
1075 		uvm_pagedequeue(pg);
1076 	mutex_exit(&uvm_pageqlock);
1077 
1078 	mutex_exit(uobj->vmobjlock);
1079 	return 0;
1080 }
1081 
1082 static const struct uvm_pagerops ulz_pager = {
1083 	.pgo_put = ulz_put,
1084 };
1085 
1086 /*
1087  * uvm_loan_init(): initialize the uvm_loan() facility.
1088  */
1089 
1090 void
1091 uvm_loan_init(void)
1092 {
1093 
1094 	mutex_init(&uvm_loanzero_lock, MUTEX_DEFAULT, IPL_NONE);
1095 	uvm_obj_init(&uvm_loanzero_object, &ulz_pager, false, 0);
1096 	uvm_obj_setlock(&uvm_loanzero_object, &uvm_loanzero_lock);
1097 
1098 	UVMHIST_INIT(loanhist, 300);
1099 }
1100 
1101 /*
1102  * uvm_loanbreak: break loan on a uobj page
1103  *
1104  * => called with uobj locked
1105  * => the page should be busy
1106  * => return value:
1107  *	newly allocated page if succeeded
1108  */
1109 struct vm_page *
1110 uvm_loanbreak(struct vm_page *uobjpage)
1111 {
1112 	struct vm_page *pg;
1113 #ifdef DIAGNOSTIC
1114 	struct uvm_object *uobj = uobjpage->uobject;
1115 #endif
1116 
1117 	KASSERT(uobj != NULL);
1118 	KASSERT(mutex_owned(uobj->vmobjlock));
1119 	KASSERT(uobjpage->flags & PG_BUSY);
1120 
1121 	/* alloc new un-owned page */
1122 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1123 	if (pg == NULL)
1124 		return NULL;
1125 
1126 	/*
1127 	 * copy the data from the old page to the new
1128 	 * one and clear the fake flags on the new page (keep it busy).
1129 	 * force a reload of the old page by clearing it from all
1130 	 * pmaps.
1131 	 * transfer dirtiness of the old page to the new page.
1132 	 * then lock the page queues to rename the pages.
1133 	 */
1134 
1135 	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1136 	pg->flags &= ~PG_FAKE;
1137 	pmap_page_protect(uobjpage, VM_PROT_NONE);
1138 	if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) {
1139 		pmap_clear_modify(pg);
1140 		pg->flags |= PG_CLEAN;
1141 	} else {
1142 		/* uvm_pagecopy marked it dirty */
1143 		KASSERT((pg->flags & PG_CLEAN) == 0);
1144 		/* a object with a dirty page should be dirty. */
1145 		KASSERT(!UVM_OBJ_IS_CLEAN(uobj));
1146 	}
1147 	if (uobjpage->flags & PG_WANTED)
1148 		wakeup(uobjpage);
1149 	/* uobj still locked */
1150 	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1151 	UVM_PAGE_OWN(uobjpage, NULL);
1152 
1153 	mutex_enter(&uvm_pageqlock);
1154 
1155 	/*
1156 	 * replace uobjpage with new page.
1157 	 */
1158 
1159 	uvm_pagereplace(uobjpage, pg);
1160 
1161 	/*
1162 	 * if the page is no longer referenced by
1163 	 * an anon (i.e. we are breaking an O->K
1164 	 * loan), then remove it from any pageq's.
1165 	 */
1166 	if (uobjpage->uanon == NULL)
1167 		uvm_pagedequeue(uobjpage);
1168 
1169 	/*
1170 	 * at this point we have absolutely no
1171 	 * control over uobjpage
1172 	 */
1173 
1174 	/* install new page */
1175 	uvm_pageactivate(pg);
1176 	mutex_exit(&uvm_pageqlock);
1177 
1178 	/*
1179 	 * done!  loan is broken and "pg" is
1180 	 * PG_BUSY.   it can now replace uobjpage.
1181 	 */
1182 
1183 	return pg;
1184 }
1185 
1186 int
1187 uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
1188 {
1189 	struct vm_page *pg;
1190 
1191 	KASSERT(mutex_owned(anon->an_lock));
1192 	KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock));
1193 
1194 	/* get new un-owned replacement page */
1195 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1196 	if (pg == NULL) {
1197 		return ENOMEM;
1198 	}
1199 
1200 	/* copy old -> new */
1201 	uvm_pagecopy(anon->an_page, pg);
1202 
1203 	/* force reload */
1204 	pmap_page_protect(anon->an_page, VM_PROT_NONE);
1205 	mutex_enter(&uvm_pageqlock);	  /* KILL loan */
1206 
1207 	anon->an_page->uanon = NULL;
1208 	/* in case we owned */
1209 	anon->an_page->pqflags &= ~PQ_ANON;
1210 
1211 	if (uobj) {
1212 		/* if we were receiver of loan */
1213 		anon->an_page->loan_count--;
1214 	} else {
1215 		/*
1216 		 * we were the lender (A->K); need to remove the page from
1217 		 * pageq's.
1218 		 */
1219 		uvm_pagedequeue(anon->an_page);
1220 	}
1221 
1222 	if (uobj) {
1223 		mutex_exit(uobj->vmobjlock);
1224 	}
1225 
1226 	/* install new page in anon */
1227 	anon->an_page = pg;
1228 	pg->uanon = anon;
1229 	pg->pqflags |= PQ_ANON;
1230 
1231 	uvm_pageactivate(pg);
1232 	mutex_exit(&uvm_pageqlock);
1233 
1234 	pg->flags &= ~(PG_BUSY|PG_FAKE);
1235 	UVM_PAGE_OWN(pg, NULL);
1236 
1237 	/* done! */
1238 
1239 	return 0;
1240 }
1241