xref: /netbsd-src/sys/uvm/uvm_loan.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: uvm_loan.c,v 1.87 2018/05/25 20:11:03 jdolecek Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
28  */
29 
30 /*
31  * uvm_loan.c: page loanout handler
32  */
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.87 2018/05/25 20:11:03 jdolecek Exp $");
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/mman.h>
41 
42 #include <uvm/uvm.h>
43 
44 #ifdef UVMHIST
45 UVMHIST_DEFINE(loanhist);
46 #endif
47 
48 /*
49  * "loaned" pages are pages which are (read-only, copy-on-write) loaned
50  * from the VM system to other parts of the kernel.   this allows page
51  * copying to be avoided (e.g. you can loan pages from objs/anons to
52  * the mbuf system).
53  *
54  * there are 3 types of loans possible:
55  *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
56  *  A->K  anon page to wired kernel page (e.g. mbuf data area)
57  *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
58  * note that it possible to have an O page loaned to both an A and K
59  * at the same time.
60  *
61  * loans are tracked by pg->loan_count.  an O->A page will have both
62  * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
63  * of page is considered "owned" by the uvm_object (not the anon).
64  *
65  * each loan of a page to the kernel bumps the pg->wire_count.  the
66  * kernel mappings for these pages will be read-only and wired.  since
67  * the page will also be wired, it will not be a candidate for pageout,
68  * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
69  * write fault in the kernel to one of these pages will not cause
70  * copy-on-write.  instead, the page fault is considered fatal.  this
71  * is because the kernel mapping will have no way to look up the
72  * object/anon which the page is owned by.  this is a good side-effect,
73  * since a kernel write to a loaned page is an error.
74  *
75  * owners that want to free their pages and discover that they are
76  * loaned out simply "disown" them (the page becomes an orphan).  these
77  * pages should be freed when the last loan is dropped.   in some cases
78  * an anon may "adopt" an orphaned page.
79  *
80  * locking: to read pg->loan_count either the owner or the page queues
81  * must be locked.   to modify pg->loan_count, both the owner of the page
82  * and the PQs must be locked.   pg->flags is (as always) locked by
83  * the owner of the page.
84  *
85  * note that locking from the "loaned" side is tricky since the object
86  * getting the loaned page has no reference to the page's owner and thus
87  * the owner could "die" at any time.   in order to prevent the owner
88  * from dying the page queues should be locked.   this forces us to sometimes
89  * use "try" locking.
90  *
91  * loans are typically broken by the following events:
92  *  1. user-level xwrite fault to a loaned page
93  *  2. pageout of clean+inactive O->A loaned page
94  *  3. owner frees page (e.g. pager flush)
95  *
96  * note that loaning a page causes all mappings of the page to become
97  * read-only (via pmap_page_protect).   this could have an unexpected
98  * effect on normal "wired" pages if one is not careful (XXX).
99  */
100 
101 /*
102  * local prototypes
103  */
104 
105 static int	uvm_loananon(struct uvm_faultinfo *, void ***,
106 			     int, struct vm_anon *);
107 static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
108 			     int, vaddr_t);
109 static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
110 static void	uvm_unloananon(struct vm_anon **, int);
111 static void	uvm_unloanpage(struct vm_page **, int);
112 static int	uvm_loanpage(struct vm_page **, int);
113 
114 
115 /*
116  * inlines
117  */
118 
119 /*
120  * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
121  *
122  * => "ufi" is the result of a successful map lookup (meaning that
123  *	on entry the map is locked by the caller)
124  * => we may unlock and then relock the map if needed (for I/O)
125  * => we put our output result in "output"
126  * => we always return with the map unlocked
127  * => possible return values:
128  *	-1 == error, map is unlocked
129  *	 0 == map relock error (try again!), map is unlocked
130  *	>0 == number of pages we loaned, map is unlocked
131  *
132  * NOTE: We can live with this being an inline, because it is only called
133  * from one place.
134  */
135 
136 static inline int
137 uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
138 {
139 	vaddr_t curaddr = ufi->orig_rvaddr;
140 	vsize_t togo = ufi->size;
141 	struct vm_aref *aref = &ufi->entry->aref;
142 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
143 	struct vm_anon *anon;
144 	int rv, result = 0;
145 
146 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
147 
148 	/*
149 	 * lock us the rest of the way down (we unlock before return)
150 	 */
151 	if (aref->ar_amap) {
152 		amap_lock(aref->ar_amap);
153 	}
154 
155 	/*
156 	 * loop until done
157 	 */
158 	while (togo) {
159 
160 		/*
161 		 * find the page we want.   check the anon layer first.
162 		 */
163 
164 		if (aref->ar_amap) {
165 			anon = amap_lookup(aref, curaddr - ufi->entry->start);
166 		} else {
167 			anon = NULL;
168 		}
169 
170 		/* locked: map, amap, uobj */
171 		if (anon) {
172 			rv = uvm_loananon(ufi, output, flags, anon);
173 		} else if (uobj) {
174 			rv = uvm_loanuobj(ufi, output, flags, curaddr);
175 		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
176 			rv = uvm_loanzero(ufi, output, flags);
177 		} else {
178 			uvmfault_unlockall(ufi, aref->ar_amap, uobj);
179 			rv = -1;
180 		}
181 		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
182 		KASSERT(rv > 0 || aref->ar_amap == NULL ||
183 		    !mutex_owned(aref->ar_amap->am_lock));
184 		KASSERT(rv > 0 || uobj == NULL ||
185 		    !mutex_owned(uobj->vmobjlock));
186 
187 		/* total failure */
188 		if (rv < 0) {
189 			UVMHIST_LOG(loanhist, "failure %jd", rv, 0,0,0);
190 			return (-1);
191 		}
192 
193 		/* relock failed, need to do another lookup */
194 		if (rv == 0) {
195 			UVMHIST_LOG(loanhist, "relock failure %jd", result
196 			    ,0,0,0);
197 			return (result);
198 		}
199 
200 		/*
201 		 * got it... advance to next page
202 		 */
203 
204 		result++;
205 		togo -= PAGE_SIZE;
206 		curaddr += PAGE_SIZE;
207 	}
208 
209 	/*
210 	 * unlock what we locked, unlock the maps and return
211 	 */
212 
213 	if (aref->ar_amap) {
214 		amap_unlock(aref->ar_amap);
215 	}
216 	uvmfault_unlockmaps(ufi, false);
217 	UVMHIST_LOG(loanhist, "done %jd", result, 0,0,0);
218 	return (result);
219 }
220 
221 /*
222  * normal functions
223  */
224 
225 /*
226  * uvm_loan: loan pages in a map out to anons or to the kernel
227  *
228  * => map should be unlocked
229  * => start and len should be multiples of PAGE_SIZE
230  * => result is either an array of anon's or vm_pages (depending on flags)
231  * => flag values: UVM_LOAN_TOANON - loan to anons
232  *                 UVM_LOAN_TOPAGE - loan to wired kernel page
233  *    one and only one of these flags must be set!
234  * => returns 0 (success), or an appropriate error number
235  */
236 
237 int
238 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
239 {
240 	struct uvm_faultinfo ufi;
241 	void **result, **output;
242 	int rv, error;
243 
244 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
245 
246 	/*
247 	 * ensure that one and only one of the flags is set
248 	 */
249 
250 	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
251 		((flags & UVM_LOAN_TOPAGE) == 0));
252 
253 	/*
254 	 * "output" is a pointer to the current place to put the loaned page.
255 	 */
256 
257 	result = v;
258 	output = &result[0];	/* start at the beginning ... */
259 
260 	/*
261 	 * while we've got pages to do
262 	 */
263 
264 	while (len > 0) {
265 
266 		/*
267 		 * fill in params for a call to uvmfault_lookup
268 		 */
269 
270 		ufi.orig_map = map;
271 		ufi.orig_rvaddr = start;
272 		ufi.orig_size = len;
273 
274 		/*
275 		 * do the lookup, the only time this will fail is if we hit on
276 		 * an unmapped region (an error)
277 		 */
278 
279 		if (!uvmfault_lookup(&ufi, false)) {
280 			error = ENOENT;
281 			goto fail;
282 		}
283 
284 		/*
285 		 * map now locked.  now do the loanout...
286 		 */
287 
288 		rv = uvm_loanentry(&ufi, &output, flags);
289 		if (rv < 0) {
290 			/* all unlocked due to error */
291 			error = EINVAL;
292 			goto fail;
293 		}
294 
295 		/*
296 		 * done!  the map is unlocked.  advance, if possible.
297 		 *
298 		 * XXXCDC: could be recoded to hold the map lock with
299 		 *	   smarter code (but it only happens on map entry
300 		 *	   boundaries, so it isn't that bad).
301 		 */
302 
303 		if (rv) {
304 			rv <<= PAGE_SHIFT;
305 			len -= rv;
306 			start += rv;
307 		}
308 	}
309 	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
310 	return 0;
311 
312 fail:
313 	/*
314 	 * failed to complete loans.  drop any loans and return failure code.
315 	 * map is already unlocked.
316 	 */
317 
318 	if (output - result) {
319 		if (flags & UVM_LOAN_TOANON) {
320 			uvm_unloananon((struct vm_anon **)result,
321 			    output - result);
322 		} else {
323 			uvm_unloanpage((struct vm_page **)result,
324 			    output - result);
325 		}
326 	}
327 	UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
328 	return (error);
329 }
330 
331 /*
332  * uvm_loananon: loan a page from an anon out
333  *
334  * => called with map, amap, uobj locked
335  * => return value:
336  *	-1 = fatal error, everything is unlocked, abort.
337  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
338  *		try again
339  *	 1 = got it, everything still locked
340  */
341 
342 int
343 uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
344     struct vm_anon *anon)
345 {
346 	struct vm_page *pg;
347 	int error;
348 
349 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
350 
351 	/*
352 	 * if we are loaning to "another" anon then it is easy, we just
353 	 * bump the reference count on the current anon and return a
354 	 * pointer to it (it becomes copy-on-write shared).
355 	 */
356 
357 	if (flags & UVM_LOAN_TOANON) {
358 		KASSERT(mutex_owned(anon->an_lock));
359 		pg = anon->an_page;
360 		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
361 			if (pg->wire_count > 0) {
362 				UVMHIST_LOG(loanhist, "->A wired %#jx",
363 				    (uintptr_t)pg, 0, 0, 0);
364 				uvmfault_unlockall(ufi,
365 				    ufi->entry->aref.ar_amap,
366 				    ufi->entry->object.uvm_obj);
367 				return (-1);
368 			}
369 			pmap_page_protect(pg, VM_PROT_READ);
370 		}
371 		anon->an_ref++;
372 		**output = anon;
373 		(*output)++;
374 		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
375 		return (1);
376 	}
377 
378 	/*
379 	 * we are loaning to a kernel-page.   we need to get the page
380 	 * resident so we can wire it.   uvmfault_anonget will handle
381 	 * this for us.
382 	 */
383 
384 	KASSERT(mutex_owned(anon->an_lock));
385 	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
386 
387 	/*
388 	 * if we were unable to get the anon, then uvmfault_anonget has
389 	 * unlocked everything and returned an error code.
390 	 */
391 
392 	if (error) {
393 		UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
394 
395 		/* need to refault (i.e. refresh our lookup) ? */
396 		if (error == ERESTART) {
397 			return (0);
398 		}
399 
400 		/* "try again"?   sleep a bit and retry ... */
401 		if (error == EAGAIN) {
402 			kpause("loanagain", false, hz/2, NULL);
403 			return (0);
404 		}
405 
406 		/* otherwise flag it as an error */
407 		return (-1);
408 	}
409 
410 	/*
411 	 * we have the page and its owner locked: do the loan now.
412 	 */
413 
414 	pg = anon->an_page;
415 	mutex_enter(&uvm_pageqlock);
416 	if (pg->wire_count > 0) {
417 		mutex_exit(&uvm_pageqlock);
418 		UVMHIST_LOG(loanhist, "->K wired %#jx", (uintptr_t)pg, 0, 0, 0);
419 		KASSERT(pg->uobject == NULL);
420 		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL);
421 		return (-1);
422 	}
423 	if (pg->loan_count == 0) {
424 		pmap_page_protect(pg, VM_PROT_READ);
425 	}
426 	pg->loan_count++;
427 	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
428 	uvm_pageactivate(pg);
429 	mutex_exit(&uvm_pageqlock);
430 	**output = pg;
431 	(*output)++;
432 
433 	/* unlock and return success */
434 	if (pg->uobject)
435 		mutex_exit(pg->uobject->vmobjlock);
436 	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
437 	return (1);
438 }
439 
440 /*
441  * uvm_loanpage: loan out pages to kernel (->K)
442  *
443  * => pages should be object-owned and the object should be locked.
444  * => in the case of error, the object might be unlocked and relocked.
445  * => caller should busy the pages beforehand.
446  * => pages will be unbusied.
447  * => fail with EBUSY if meet a wired page.
448  */
449 static int
450 uvm_loanpage(struct vm_page **pgpp, int npages)
451 {
452 	int i;
453 	int error = 0;
454 
455 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
456 
457 	for (i = 0; i < npages; i++) {
458 		struct vm_page *pg = pgpp[i];
459 
460 		KASSERT(pg->uobject != NULL);
461 		KASSERT(pg->uobject == pgpp[0]->uobject);
462 		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
463 		KASSERT(mutex_owned(pg->uobject->vmobjlock));
464 		KASSERT(pg->flags & PG_BUSY);
465 
466 		mutex_enter(&uvm_pageqlock);
467 		if (pg->wire_count > 0) {
468 			mutex_exit(&uvm_pageqlock);
469 			UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg,
470 			    0, 0, 0);
471 			error = EBUSY;
472 			break;
473 		}
474 		if (pg->loan_count == 0) {
475 			pmap_page_protect(pg, VM_PROT_READ);
476 		}
477 		pg->loan_count++;
478 		KASSERT(pg->loan_count > 0);	/* detect wrap-around */
479 		uvm_pageactivate(pg);
480 		mutex_exit(&uvm_pageqlock);
481 	}
482 
483 	uvm_page_unbusy(pgpp, npages);
484 
485 	if (error) {
486 		/*
487 		 * backout what we've done
488 		 */
489 		kmutex_t *slock = pgpp[0]->uobject->vmobjlock;
490 
491 		mutex_exit(slock);
492 		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
493 		mutex_enter(slock);
494 	}
495 
496 	UVMHIST_LOG(loanhist, "done %jd", error, 0, 0, 0);
497 	return error;
498 }
499 
500 /*
501  * XXX UBC temp limit
502  * number of pages to get at once.
503  * should be <= MAX_READ_AHEAD in genfs_vnops.c
504  */
505 #define	UVM_LOAN_GET_CHUNK	16
506 
507 /*
508  * uvm_loanuobjpages: loan pages from a uobj out (O->K)
509  *
510  * => uobj shouldn't be locked.  (we'll lock it)
511  * => fail with EBUSY if we meet a wired page.
512  */
513 int
514 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
515     struct vm_page **origpgpp)
516 {
517 	int ndone; /* # of pages loaned out */
518 	struct vm_page **pgpp;
519 	int error;
520 	int i;
521 	kmutex_t *slock;
522 
523 	pgpp = origpgpp;
524 	for (ndone = 0; ndone < orignpages; ) {
525 		int npages;
526 		/* npendloan: # of pages busied but not loand out yet. */
527 		int npendloan = 0xdead; /* XXX gcc */
528 reget:
529 		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
530 		mutex_enter(uobj->vmobjlock);
531 		error = (*uobj->pgops->pgo_get)(uobj,
532 		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
533 		    VM_PROT_READ, 0, PGO_SYNCIO);
534 		if (error == EAGAIN) {
535 			kpause("loanuopg", false, hz/2, NULL);
536 			continue;
537 		}
538 		if (error)
539 			goto fail;
540 
541 		KASSERT(npages > 0);
542 
543 		/* loan and unbusy pages */
544 		slock = NULL;
545 		for (i = 0; i < npages; i++) {
546 			kmutex_t *nextslock; /* slock for next page */
547 			struct vm_page *pg = *pgpp;
548 
549 			/* XXX assuming that the page is owned by uobj */
550 			KASSERT(pg->uobject != NULL);
551 			nextslock = pg->uobject->vmobjlock;
552 
553 			if (slock != nextslock) {
554 				if (slock) {
555 					KASSERT(npendloan > 0);
556 					error = uvm_loanpage(pgpp - npendloan,
557 					    npendloan);
558 					mutex_exit(slock);
559 					if (error)
560 						goto fail;
561 					ndone += npendloan;
562 					KASSERT(origpgpp + ndone == pgpp);
563 				}
564 				slock = nextslock;
565 				npendloan = 0;
566 				mutex_enter(slock);
567 			}
568 
569 			if ((pg->flags & PG_RELEASED) != 0) {
570 				/*
571 				 * release pages and try again.
572 				 */
573 				mutex_exit(slock);
574 				for (; i < npages; i++) {
575 					pg = pgpp[i];
576 					slock = pg->uobject->vmobjlock;
577 
578 					mutex_enter(slock);
579 					mutex_enter(&uvm_pageqlock);
580 					uvm_page_unbusy(&pg, 1);
581 					mutex_exit(&uvm_pageqlock);
582 					mutex_exit(slock);
583 				}
584 				goto reget;
585 			}
586 
587 			npendloan++;
588 			pgpp++;
589 			KASSERT(origpgpp + ndone + npendloan == pgpp);
590 		}
591 		KASSERT(slock != NULL);
592 		KASSERT(npendloan > 0);
593 		error = uvm_loanpage(pgpp - npendloan, npendloan);
594 		mutex_exit(slock);
595 		if (error)
596 			goto fail;
597 		ndone += npendloan;
598 		KASSERT(origpgpp + ndone == pgpp);
599 	}
600 
601 	return 0;
602 
603 fail:
604 	uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
605 
606 	return error;
607 }
608 
609 /*
610  * uvm_loanuobj: loan a page from a uobj out
611  *
612  * => called with map, amap, uobj locked
613  * => return value:
614  *	-1 = fatal error, everything is unlocked, abort.
615  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
616  *		try again
617  *	 1 = got it, everything still locked
618  */
619 
620 static int
621 uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
622 {
623 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
624 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
625 	struct vm_page *pg;
626 	int error, npages;
627 	bool locked;
628 
629 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
630 
631 	/*
632 	 * first we must make sure the page is resident.
633 	 *
634 	 * XXXCDC: duplicate code with uvm_fault().
635 	 */
636 
637 	/* locked: maps(read), amap(if there) */
638 	mutex_enter(uobj->vmobjlock);
639 	/* locked: maps(read), amap(if there), uobj */
640 
641 	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
642 		npages = 1;
643 		pg = NULL;
644 		error = (*uobj->pgops->pgo_get)(uobj,
645 		    va - ufi->entry->start + ufi->entry->offset,
646 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
647 	} else {
648 		error = EIO;		/* must have pgo_get op */
649 	}
650 
651 	/*
652 	 * check the result of the locked pgo_get.  if there is a problem,
653 	 * then we fail the loan.
654 	 */
655 
656 	if (error && error != EBUSY) {
657 		uvmfault_unlockall(ufi, amap, uobj);
658 		return (-1);
659 	}
660 
661 	/*
662 	 * if we need to unlock for I/O, do so now.
663 	 */
664 
665 	if (error == EBUSY) {
666 		uvmfault_unlockall(ufi, amap, NULL);
667 
668 		/* locked: uobj */
669 		npages = 1;
670 		error = (*uobj->pgops->pgo_get)(uobj,
671 		    va - ufi->entry->start + ufi->entry->offset,
672 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
673 		/* locked: <nothing> */
674 
675 		if (error) {
676 			if (error == EAGAIN) {
677 				kpause("fltagain2", false, hz/2, NULL);
678 				return (0);
679 			}
680 			return (-1);
681 		}
682 
683 		/*
684 		 * pgo_get was a success.   attempt to relock everything.
685 		 */
686 
687 		locked = uvmfault_relock(ufi);
688 		if (locked && amap)
689 			amap_lock(amap);
690 		uobj = pg->uobject;
691 		mutex_enter(uobj->vmobjlock);
692 
693 		/*
694 		 * verify that the page has not be released and re-verify
695 		 * that amap slot is still free.   if there is a problem we
696 		 * drop our lock (thus force a lookup refresh/retry).
697 		 */
698 
699 		if ((pg->flags & PG_RELEASED) != 0 ||
700 		    (locked && amap && amap_lookup(&ufi->entry->aref,
701 		    ufi->orig_rvaddr - ufi->entry->start))) {
702 			if (locked)
703 				uvmfault_unlockall(ufi, amap, NULL);
704 			locked = false;
705 		}
706 
707 		/*
708 		 * didn't get the lock?   release the page and retry.
709 		 */
710 
711 		if (locked == false) {
712 			if (pg->flags & PG_WANTED) {
713 				wakeup(pg);
714 			}
715 			if (pg->flags & PG_RELEASED) {
716 				mutex_enter(&uvm_pageqlock);
717 				uvm_pagefree(pg);
718 				mutex_exit(&uvm_pageqlock);
719 				mutex_exit(uobj->vmobjlock);
720 				return (0);
721 			}
722 			mutex_enter(&uvm_pageqlock);
723 			uvm_pageactivate(pg);
724 			mutex_exit(&uvm_pageqlock);
725 			pg->flags &= ~(PG_BUSY|PG_WANTED);
726 			UVM_PAGE_OWN(pg, NULL);
727 			mutex_exit(uobj->vmobjlock);
728 			return (0);
729 		}
730 	}
731 
732 	KASSERT(uobj == pg->uobject);
733 
734 	/*
735 	 * at this point we have the page we want ("pg") marked PG_BUSY for us
736 	 * and we have all data structures locked.  do the loanout.  page can
737 	 * not be PG_RELEASED (we caught this above).
738 	 */
739 
740 	if ((flags & UVM_LOAN_TOANON) == 0) {
741 		if (uvm_loanpage(&pg, 1)) {
742 			uvmfault_unlockall(ufi, amap, uobj);
743 			return (-1);
744 		}
745 		mutex_exit(uobj->vmobjlock);
746 		**output = pg;
747 		(*output)++;
748 		return (1);
749 	}
750 
751 #ifdef notdef
752 	/*
753 	 * must be a loan to an anon.   check to see if there is already
754 	 * an anon associated with this page.  if so, then just return
755 	 * a reference to this object.   the page should already be
756 	 * mapped read-only because it is already on loan.
757 	 */
758 
759 	if (pg->uanon) {
760 		/* XXX: locking */
761 		anon = pg->uanon;
762 		anon->an_ref++;
763 		if (pg->flags & PG_WANTED) {
764 			wakeup(pg);
765 		}
766 		pg->flags &= ~(PG_WANTED|PG_BUSY);
767 		UVM_PAGE_OWN(pg, NULL);
768 		mutex_exit(uobj->vmobjlock);
769 		**output = anon;
770 		(*output)++;
771 		return (1);
772 	}
773 
774 	/*
775 	 * need to allocate a new anon
776 	 */
777 
778 	anon = uvm_analloc();
779 	if (anon == NULL) {
780 		goto fail;
781 	}
782 	mutex_enter(&uvm_pageqlock);
783 	if (pg->wire_count > 0) {
784 		mutex_exit(&uvm_pageqlock);
785 		UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg, 0, 0, 0);
786 		goto fail;
787 	}
788 	if (pg->loan_count == 0) {
789 		pmap_page_protect(pg, VM_PROT_READ);
790 	}
791 	pg->loan_count++;
792 	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
793 	pg->uanon = anon;
794 	anon->an_page = pg;
795 	anon->an_lock = /* TODO: share amap lock */
796 	uvm_pageactivate(pg);
797 	mutex_exit(&uvm_pageqlock);
798 	if (pg->flags & PG_WANTED) {
799 		wakeup(pg);
800 	}
801 	pg->flags &= ~(PG_WANTED|PG_BUSY);
802 	UVM_PAGE_OWN(pg, NULL);
803 	mutex_exit(uobj->vmobjlock);
804 	mutex_exit(&anon->an_lock);
805 	**output = anon;
806 	(*output)++;
807 	return (1);
808 
809 fail:
810 	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
811 	/*
812 	 * unlock everything and bail out.
813 	 */
814 	if (pg->flags & PG_WANTED) {
815 		wakeup(pg);
816 	}
817 	pg->flags &= ~(PG_WANTED|PG_BUSY);
818 	UVM_PAGE_OWN(pg, NULL);
819 	uvmfault_unlockall(ufi, amap, uobj, NULL);
820 	if (anon) {
821 		anon->an_ref--;
822 		uvm_anon_free(anon);
823 	}
824 #endif	/* notdef */
825 	return (-1);
826 }
827 
828 /*
829  * uvm_loanzero: loan a zero-fill page out
830  *
831  * => called with map, amap, uobj locked
832  * => return value:
833  *	-1 = fatal error, everything is unlocked, abort.
834  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
835  *		try again
836  *	 1 = got it, everything still locked
837  */
838 
839 static struct uvm_object uvm_loanzero_object;
840 static kmutex_t uvm_loanzero_lock;
841 
842 static int
843 uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
844 {
845 	struct vm_page *pg;
846 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
847 
848 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
849 again:
850 	mutex_enter(uvm_loanzero_object.vmobjlock);
851 
852 	/*
853 	 * first, get ahold of our single zero page.
854 	 */
855 
856 	if (__predict_false((pg =
857 			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
858 		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
859 					   UVM_PGA_ZERO)) == NULL) {
860 			mutex_exit(uvm_loanzero_object.vmobjlock);
861 			uvmfault_unlockall(ufi, amap, NULL);
862 			uvm_wait("loanzero");
863 			if (!uvmfault_relock(ufi)) {
864 				return (0);
865 			}
866 			if (amap) {
867 				amap_lock(amap);
868 			}
869 			goto again;
870 		}
871 
872 		/* got a zero'd page. */
873 		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
874 		pg->flags |= PG_RDONLY;
875 		mutex_enter(&uvm_pageqlock);
876 		uvm_pageactivate(pg);
877 		mutex_exit(&uvm_pageqlock);
878 		UVM_PAGE_OWN(pg, NULL);
879 	}
880 
881 	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
882 		mutex_enter(&uvm_pageqlock);
883 		pg->loan_count++;
884 		KASSERT(pg->loan_count > 0);	/* detect wrap-around */
885 		mutex_exit(&uvm_pageqlock);
886 		mutex_exit(uvm_loanzero_object.vmobjlock);
887 		**output = pg;
888 		(*output)++;
889 		return (1);
890 	}
891 
892 #ifdef notdef
893 	/*
894 	 * loaning to an anon.  check to see if there is already an anon
895 	 * associated with this page.  if so, then just return a reference
896 	 * to this object.
897 	 */
898 
899 	if (pg->uanon) {
900 		anon = pg->uanon;
901 		mutex_enter(&anon->an_lock);
902 		anon->an_ref++;
903 		mutex_exit(&anon->an_lock);
904 		mutex_exit(uvm_loanzero_object.vmobjlock);
905 		**output = anon;
906 		(*output)++;
907 		return (1);
908 	}
909 
910 	/*
911 	 * need to allocate a new anon
912 	 */
913 
914 	anon = uvm_analloc();
915 	if (anon == NULL) {
916 		/* out of swap causes us to fail */
917 		mutex_exit(uvm_loanzero_object.vmobjlock);
918 		uvmfault_unlockall(ufi, amap, NULL, NULL);
919 		return (-1);
920 	}
921 	anon->an_page = pg;
922 	pg->uanon = anon;
923 	mutex_enter(&uvm_pageqlock);
924 	pg->loan_count++;
925 	KASSERT(pg->loan_count > 0);	/* detect wrap-around */
926 	uvm_pageactivate(pg);
927 	mutex_exit(&uvm_pageqlock);
928 	mutex_exit(&anon->an_lock);
929 	mutex_exit(uvm_loanzero_object.vmobjlock);
930 	**output = anon;
931 	(*output)++;
932 	return (1);
933 #else
934 	return (-1);
935 #endif
936 }
937 
938 
939 /*
940  * uvm_unloananon: kill loans on anons (basically a normal ref drop)
941  *
942  * => we expect all our resources to be unlocked
943  */
944 
945 static void
946 uvm_unloananon(struct vm_anon **aloans, int nanons)
947 {
948 #ifdef notdef
949 	struct vm_anon *anon, *to_free = NULL;
950 
951 	/* TODO: locking */
952 	amap_lock(amap);
953 	while (nanons-- > 0) {
954 		anon = *aloans++;
955 		if (--anon->an_ref == 0) {
956 			anon->an_link = to_free;
957 			to_free = anon;
958 		}
959 	}
960 	uvm_anon_freelst(amap, to_free);
961 #endif	/* notdef */
962 }
963 
964 /*
965  * uvm_unloanpage: kill loans on pages loaned out to the kernel
966  *
967  * => we expect all our resources to be unlocked
968  */
969 
970 static void
971 uvm_unloanpage(struct vm_page **ploans, int npages)
972 {
973 	struct vm_page *pg;
974 	kmutex_t *slock;
975 
976 	mutex_enter(&uvm_pageqlock);
977 	while (npages-- > 0) {
978 		pg = *ploans++;
979 
980 		/*
981 		 * do a little dance to acquire the object or anon lock
982 		 * as appropriate.  we are locking in the wrong order,
983 		 * so we have to do a try-lock here.
984 		 */
985 
986 		slock = NULL;
987 		while (pg->uobject != NULL || pg->uanon != NULL) {
988 			if (pg->uobject != NULL) {
989 				slock = pg->uobject->vmobjlock;
990 			} else {
991 				slock = pg->uanon->an_lock;
992 			}
993 			if (mutex_tryenter(slock)) {
994 				break;
995 			}
996 			/* XXX Better than yielding but inadequate. */
997 			kpause("livelock", false, 1, &uvm_pageqlock);
998 			slock = NULL;
999 		}
1000 
1001 		/*
1002 		 * drop our loan.  if page is owned by an anon but
1003 		 * PQ_ANON is not set, the page was loaned to the anon
1004 		 * from an object which dropped ownership, so resolve
1005 		 * this by turning the anon's loan into real ownership
1006 		 * (ie. decrement loan_count again and set PQ_ANON).
1007 		 * after all this, if there are no loans left, put the
1008 		 * page back a paging queue (if the page is owned by
1009 		 * an anon) or free it (if the page is now unowned).
1010 		 */
1011 
1012 		KASSERT(pg->loan_count > 0);
1013 		pg->loan_count--;
1014 		if (pg->uobject == NULL && pg->uanon != NULL &&
1015 		    (pg->pqflags & PQ_ANON) == 0) {
1016 			KASSERT(pg->loan_count > 0);
1017 			pg->loan_count--;
1018 			pg->pqflags |= PQ_ANON;
1019 		}
1020 		if (pg->loan_count == 0 && pg->uobject == NULL &&
1021 		    pg->uanon == NULL) {
1022 			KASSERT((pg->flags & PG_BUSY) == 0);
1023 			uvm_pagefree(pg);
1024 		}
1025 		if (slock != NULL) {
1026 			mutex_exit(slock);
1027 		}
1028 	}
1029 	mutex_exit(&uvm_pageqlock);
1030 }
1031 
1032 /*
1033  * uvm_unloan: kill loans on pages or anons.
1034  */
1035 
1036 void
1037 uvm_unloan(void *v, int npages, int flags)
1038 {
1039 	if (flags & UVM_LOAN_TOANON) {
1040 		uvm_unloananon(v, npages);
1041 	} else {
1042 		uvm_unloanpage(v, npages);
1043 	}
1044 }
1045 
1046 /*
1047  * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1048  * method, because the page can end up on a paging queue, and the
1049  * page daemon will want to call pgo_put when it encounters the page
1050  * on the inactive list.
1051  */
1052 
1053 static int
1054 ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1055 {
1056 	struct vm_page *pg;
1057 
1058 	KDASSERT(uobj == &uvm_loanzero_object);
1059 
1060 	/*
1061 	 * Don't need to do any work here if we're not freeing pages.
1062 	 */
1063 
1064 	if ((flags & PGO_FREE) == 0) {
1065 		mutex_exit(uobj->vmobjlock);
1066 		return 0;
1067 	}
1068 
1069 	/*
1070 	 * we don't actually want to ever free the uvm_loanzero_page, so
1071 	 * just reactivate or dequeue it.
1072 	 */
1073 
1074 	pg = TAILQ_FIRST(&uobj->memq);
1075 	KASSERT(pg != NULL);
1076 	KASSERT(TAILQ_NEXT(pg, listq.queue) == NULL);
1077 
1078 	mutex_enter(&uvm_pageqlock);
1079 	if (pg->uanon)
1080 		uvm_pageactivate(pg);
1081 	else
1082 		uvm_pagedequeue(pg);
1083 	mutex_exit(&uvm_pageqlock);
1084 
1085 	mutex_exit(uobj->vmobjlock);
1086 	return 0;
1087 }
1088 
1089 static const struct uvm_pagerops ulz_pager = {
1090 	.pgo_put = ulz_put,
1091 };
1092 
1093 /*
1094  * uvm_loan_init(): initialize the uvm_loan() facility.
1095  */
1096 
1097 void
1098 uvm_loan_init(void)
1099 {
1100 
1101 	mutex_init(&uvm_loanzero_lock, MUTEX_DEFAULT, IPL_NONE);
1102 	uvm_obj_init(&uvm_loanzero_object, &ulz_pager, false, 0);
1103 	uvm_obj_setlock(&uvm_loanzero_object, &uvm_loanzero_lock);
1104 
1105 	UVMHIST_INIT(loanhist, 300);
1106 }
1107 
1108 /*
1109  * uvm_loanbreak: break loan on a uobj page
1110  *
1111  * => called with uobj locked
1112  * => the page should be busy
1113  * => return value:
1114  *	newly allocated page if succeeded
1115  */
1116 struct vm_page *
1117 uvm_loanbreak(struct vm_page *uobjpage)
1118 {
1119 	struct vm_page *pg;
1120 	struct uvm_object *uobj __diagused = uobjpage->uobject;
1121 
1122 	KASSERT(uobj != NULL);
1123 	KASSERT(mutex_owned(uobj->vmobjlock));
1124 	KASSERT(uobjpage->flags & PG_BUSY);
1125 
1126 	/* alloc new un-owned page */
1127 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1128 	if (pg == NULL)
1129 		return NULL;
1130 
1131 	/*
1132 	 * copy the data from the old page to the new
1133 	 * one and clear the fake flags on the new page (keep it busy).
1134 	 * force a reload of the old page by clearing it from all
1135 	 * pmaps.
1136 	 * transfer dirtiness of the old page to the new page.
1137 	 * then lock the page queues to rename the pages.
1138 	 */
1139 
1140 	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1141 	pg->flags &= ~PG_FAKE;
1142 	pmap_page_protect(uobjpage, VM_PROT_NONE);
1143 	if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) {
1144 		pmap_clear_modify(pg);
1145 		pg->flags |= PG_CLEAN;
1146 	} else {
1147 		/* uvm_pagecopy marked it dirty */
1148 		KASSERT((pg->flags & PG_CLEAN) == 0);
1149 		/* a object with a dirty page should be dirty. */
1150 		KASSERT(!UVM_OBJ_IS_CLEAN(uobj));
1151 	}
1152 	if (uobjpage->flags & PG_WANTED)
1153 		wakeup(uobjpage);
1154 	/* uobj still locked */
1155 	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1156 	UVM_PAGE_OWN(uobjpage, NULL);
1157 
1158 	mutex_enter(&uvm_pageqlock);
1159 
1160 	/*
1161 	 * replace uobjpage with new page.
1162 	 */
1163 
1164 	uvm_pagereplace(uobjpage, pg);
1165 
1166 	/*
1167 	 * if the page is no longer referenced by
1168 	 * an anon (i.e. we are breaking an O->K
1169 	 * loan), then remove it from any pageq's.
1170 	 */
1171 	if (uobjpage->uanon == NULL)
1172 		uvm_pagedequeue(uobjpage);
1173 
1174 	/*
1175 	 * at this point we have absolutely no
1176 	 * control over uobjpage
1177 	 */
1178 
1179 	/* install new page */
1180 	uvm_pageactivate(pg);
1181 	mutex_exit(&uvm_pageqlock);
1182 
1183 	/*
1184 	 * done!  loan is broken and "pg" is
1185 	 * PG_BUSY.   it can now replace uobjpage.
1186 	 */
1187 
1188 	return pg;
1189 }
1190 
1191 int
1192 uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
1193 {
1194 	struct vm_page *pg;
1195 
1196 	KASSERT(mutex_owned(anon->an_lock));
1197 	KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock));
1198 
1199 	/* get new un-owned replacement page */
1200 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1201 	if (pg == NULL) {
1202 		return ENOMEM;
1203 	}
1204 
1205 	/* copy old -> new */
1206 	uvm_pagecopy(anon->an_page, pg);
1207 
1208 	/* force reload */
1209 	pmap_page_protect(anon->an_page, VM_PROT_NONE);
1210 	mutex_enter(&uvm_pageqlock);	  /* KILL loan */
1211 
1212 	anon->an_page->uanon = NULL;
1213 	/* in case we owned */
1214 	anon->an_page->pqflags &= ~PQ_ANON;
1215 
1216 	if (uobj) {
1217 		/* if we were receiver of loan */
1218 		anon->an_page->loan_count--;
1219 	} else {
1220 		/*
1221 		 * we were the lender (A->K); need to remove the page from
1222 		 * pageq's.
1223 		 */
1224 		uvm_pagedequeue(anon->an_page);
1225 	}
1226 
1227 	if (uobj) {
1228 		mutex_exit(uobj->vmobjlock);
1229 	}
1230 
1231 	/* install new page in anon */
1232 	anon->an_page = pg;
1233 	pg->uanon = anon;
1234 	pg->pqflags |= PQ_ANON;
1235 
1236 	uvm_pageactivate(pg);
1237 	mutex_exit(&uvm_pageqlock);
1238 
1239 	pg->flags &= ~(PG_BUSY|PG_FAKE);
1240 	UVM_PAGE_OWN(pg, NULL);
1241 
1242 	/* done! */
1243 
1244 	return 0;
1245 }
1246