xref: /netbsd-src/sys/uvm/uvm_loan.c (revision c38e7cc395b1472a774ff828e46123de44c628e9)
1 /*	$NetBSD: uvm_loan.c,v 1.85 2017/10/28 00:37:13 pgoyette Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
28  */
29 
30 /*
31  * uvm_loan.c: page loanout handler
32  */
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.85 2017/10/28 00:37:13 pgoyette Exp $");
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/mman.h>
41 
42 #include <uvm/uvm.h>
43 
44 #ifdef UVMHIST
45 UVMHIST_DEFINE(loanhist);
46 #endif
47 
48 /*
49  * "loaned" pages are pages which are (read-only, copy-on-write) loaned
50  * from the VM system to other parts of the kernel.   this allows page
51  * copying to be avoided (e.g. you can loan pages from objs/anons to
52  * the mbuf system).
53  *
54  * there are 3 types of loans possible:
55  *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
56  *  A->K  anon page to wired kernel page (e.g. mbuf data area)
57  *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
58  * note that it possible to have an O page loaned to both an A and K
59  * at the same time.
60  *
61  * loans are tracked by pg->loan_count.  an O->A page will have both
62  * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
63  * of page is considered "owned" by the uvm_object (not the anon).
64  *
65  * each loan of a page to the kernel bumps the pg->wire_count.  the
66  * kernel mappings for these pages will be read-only and wired.  since
67  * the page will also be wired, it will not be a candidate for pageout,
68  * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
69  * write fault in the kernel to one of these pages will not cause
70  * copy-on-write.  instead, the page fault is considered fatal.  this
71  * is because the kernel mapping will have no way to look up the
72  * object/anon which the page is owned by.  this is a good side-effect,
73  * since a kernel write to a loaned page is an error.
74  *
75  * owners that want to free their pages and discover that they are
76  * loaned out simply "disown" them (the page becomes an orphan).  these
77  * pages should be freed when the last loan is dropped.   in some cases
78  * an anon may "adopt" an orphaned page.
79  *
80  * locking: to read pg->loan_count either the owner or the page queues
81  * must be locked.   to modify pg->loan_count, both the owner of the page
82  * and the PQs must be locked.   pg->flags is (as always) locked by
83  * the owner of the page.
84  *
85  * note that locking from the "loaned" side is tricky since the object
86  * getting the loaned page has no reference to the page's owner and thus
87  * the owner could "die" at any time.   in order to prevent the owner
88  * from dying the page queues should be locked.   this forces us to sometimes
89  * use "try" locking.
90  *
91  * loans are typically broken by the following events:
92  *  1. user-level xwrite fault to a loaned page
93  *  2. pageout of clean+inactive O->A loaned page
94  *  3. owner frees page (e.g. pager flush)
95  *
96  * note that loaning a page causes all mappings of the page to become
97  * read-only (via pmap_page_protect).   this could have an unexpected
98  * effect on normal "wired" pages if one is not careful (XXX).
99  */
100 
101 /*
102  * local prototypes
103  */
104 
105 static int	uvm_loananon(struct uvm_faultinfo *, void ***,
106 			     int, struct vm_anon *);
107 static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
108 			     int, vaddr_t);
109 static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
110 static void	uvm_unloananon(struct vm_anon **, int);
111 static void	uvm_unloanpage(struct vm_page **, int);
112 static int	uvm_loanpage(struct vm_page **, int);
113 
114 
115 /*
116  * inlines
117  */
118 
119 /*
120  * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
121  *
122  * => "ufi" is the result of a successful map lookup (meaning that
123  *	on entry the map is locked by the caller)
124  * => we may unlock and then relock the map if needed (for I/O)
125  * => we put our output result in "output"
126  * => we always return with the map unlocked
127  * => possible return values:
128  *	-1 == error, map is unlocked
129  *	 0 == map relock error (try again!), map is unlocked
130  *	>0 == number of pages we loaned, map is unlocked
131  *
132  * NOTE: We can live with this being an inline, because it is only called
133  * from one place.
134  */
135 
136 static inline int
137 uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
138 {
139 	vaddr_t curaddr = ufi->orig_rvaddr;
140 	vsize_t togo = ufi->size;
141 	struct vm_aref *aref = &ufi->entry->aref;
142 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
143 	struct vm_anon *anon;
144 	int rv, result = 0;
145 
146 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
147 
148 	/*
149 	 * lock us the rest of the way down (we unlock before return)
150 	 */
151 	if (aref->ar_amap) {
152 		amap_lock(aref->ar_amap);
153 	}
154 
155 	/*
156 	 * loop until done
157 	 */
158 	while (togo) {
159 
160 		/*
161 		 * find the page we want.   check the anon layer first.
162 		 */
163 
164 		if (aref->ar_amap) {
165 			anon = amap_lookup(aref, curaddr - ufi->entry->start);
166 		} else {
167 			anon = NULL;
168 		}
169 
170 		/* locked: map, amap, uobj */
171 		if (anon) {
172 			rv = uvm_loananon(ufi, output, flags, anon);
173 		} else if (uobj) {
174 			rv = uvm_loanuobj(ufi, output, flags, curaddr);
175 		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
176 			rv = uvm_loanzero(ufi, output, flags);
177 		} else {
178 			uvmfault_unlockall(ufi, aref->ar_amap, uobj);
179 			rv = -1;
180 		}
181 		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
182 		KASSERT(rv > 0 || aref->ar_amap == NULL ||
183 		    !mutex_owned(aref->ar_amap->am_lock));
184 		KASSERT(rv > 0 || uobj == NULL ||
185 		    !mutex_owned(uobj->vmobjlock));
186 
187 		/* total failure */
188 		if (rv < 0) {
189 			UVMHIST_LOG(loanhist, "failure %jd", rv, 0,0,0);
190 			return (-1);
191 		}
192 
193 		/* relock failed, need to do another lookup */
194 		if (rv == 0) {
195 			UVMHIST_LOG(loanhist, "relock failure %jd", result
196 			    ,0,0,0);
197 			return (result);
198 		}
199 
200 		/*
201 		 * got it... advance to next page
202 		 */
203 
204 		result++;
205 		togo -= PAGE_SIZE;
206 		curaddr += PAGE_SIZE;
207 	}
208 
209 	/*
210 	 * unlock what we locked, unlock the maps and return
211 	 */
212 
213 	if (aref->ar_amap) {
214 		amap_unlock(aref->ar_amap);
215 	}
216 	uvmfault_unlockmaps(ufi, false);
217 	UVMHIST_LOG(loanhist, "done %jd", result, 0,0,0);
218 	return (result);
219 }
220 
221 /*
222  * normal functions
223  */
224 
225 /*
226  * uvm_loan: loan pages in a map out to anons or to the kernel
227  *
228  * => map should be unlocked
229  * => start and len should be multiples of PAGE_SIZE
230  * => result is either an array of anon's or vm_pages (depending on flags)
231  * => flag values: UVM_LOAN_TOANON - loan to anons
232  *                 UVM_LOAN_TOPAGE - loan to wired kernel page
233  *    one and only one of these flags must be set!
234  * => returns 0 (success), or an appropriate error number
235  */
236 
237 int
238 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
239 {
240 	struct uvm_faultinfo ufi;
241 	void **result, **output;
242 	int rv, error;
243 
244 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
245 
246 	/*
247 	 * ensure that one and only one of the flags is set
248 	 */
249 
250 	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
251 		((flags & UVM_LOAN_TOPAGE) == 0));
252 
253 	/*
254 	 * "output" is a pointer to the current place to put the loaned page.
255 	 */
256 
257 	result = v;
258 	output = &result[0];	/* start at the beginning ... */
259 
260 	/*
261 	 * while we've got pages to do
262 	 */
263 
264 	while (len > 0) {
265 
266 		/*
267 		 * fill in params for a call to uvmfault_lookup
268 		 */
269 
270 		ufi.orig_map = map;
271 		ufi.orig_rvaddr = start;
272 		ufi.orig_size = len;
273 
274 		/*
275 		 * do the lookup, the only time this will fail is if we hit on
276 		 * an unmapped region (an error)
277 		 */
278 
279 		if (!uvmfault_lookup(&ufi, false)) {
280 			error = ENOENT;
281 			goto fail;
282 		}
283 
284 		/*
285 		 * map now locked.  now do the loanout...
286 		 */
287 
288 		rv = uvm_loanentry(&ufi, &output, flags);
289 		if (rv < 0) {
290 			/* all unlocked due to error */
291 			error = EINVAL;
292 			goto fail;
293 		}
294 
295 		/*
296 		 * done!  the map is unlocked.  advance, if possible.
297 		 *
298 		 * XXXCDC: could be recoded to hold the map lock with
299 		 *	   smarter code (but it only happens on map entry
300 		 *	   boundaries, so it isn't that bad).
301 		 */
302 
303 		if (rv) {
304 			rv <<= PAGE_SHIFT;
305 			len -= rv;
306 			start += rv;
307 		}
308 	}
309 	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
310 	return 0;
311 
312 fail:
313 	/*
314 	 * failed to complete loans.  drop any loans and return failure code.
315 	 * map is already unlocked.
316 	 */
317 
318 	if (output - result) {
319 		if (flags & UVM_LOAN_TOANON) {
320 			uvm_unloananon((struct vm_anon **)result,
321 			    output - result);
322 		} else {
323 			uvm_unloanpage((struct vm_page **)result,
324 			    output - result);
325 		}
326 	}
327 	UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
328 	return (error);
329 }
330 
331 /*
332  * uvm_loananon: loan a page from an anon out
333  *
334  * => called with map, amap, uobj locked
335  * => return value:
336  *	-1 = fatal error, everything is unlocked, abort.
337  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
338  *		try again
339  *	 1 = got it, everything still locked
340  */
341 
342 int
343 uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
344     struct vm_anon *anon)
345 {
346 	struct vm_page *pg;
347 	int error;
348 
349 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
350 
351 	/*
352 	 * if we are loaning to "another" anon then it is easy, we just
353 	 * bump the reference count on the current anon and return a
354 	 * pointer to it (it becomes copy-on-write shared).
355 	 */
356 
357 	if (flags & UVM_LOAN_TOANON) {
358 		KASSERT(mutex_owned(anon->an_lock));
359 		pg = anon->an_page;
360 		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
361 			if (pg->wire_count > 0) {
362 				UVMHIST_LOG(loanhist, "->A wired %#jx",
363 				    (uintptr_t)pg, 0, 0, 0);
364 				uvmfault_unlockall(ufi,
365 				    ufi->entry->aref.ar_amap,
366 				    ufi->entry->object.uvm_obj);
367 				return (-1);
368 			}
369 			pmap_page_protect(pg, VM_PROT_READ);
370 		}
371 		anon->an_ref++;
372 		**output = anon;
373 		(*output)++;
374 		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
375 		return (1);
376 	}
377 
378 	/*
379 	 * we are loaning to a kernel-page.   we need to get the page
380 	 * resident so we can wire it.   uvmfault_anonget will handle
381 	 * this for us.
382 	 */
383 
384 	KASSERT(mutex_owned(anon->an_lock));
385 	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
386 
387 	/*
388 	 * if we were unable to get the anon, then uvmfault_anonget has
389 	 * unlocked everything and returned an error code.
390 	 */
391 
392 	if (error) {
393 		UVMHIST_LOG(loanhist, "error %jd", error,0,0,0);
394 
395 		/* need to refault (i.e. refresh our lookup) ? */
396 		if (error == ERESTART) {
397 			return (0);
398 		}
399 
400 		/* "try again"?   sleep a bit and retry ... */
401 		if (error == EAGAIN) {
402 			kpause("loanagain", false, hz/2, NULL);
403 			return (0);
404 		}
405 
406 		/* otherwise flag it as an error */
407 		return (-1);
408 	}
409 
410 	/*
411 	 * we have the page and its owner locked: do the loan now.
412 	 */
413 
414 	pg = anon->an_page;
415 	mutex_enter(&uvm_pageqlock);
416 	if (pg->wire_count > 0) {
417 		mutex_exit(&uvm_pageqlock);
418 		UVMHIST_LOG(loanhist, "->K wired %#jx", (uintptr_t)pg, 0, 0, 0);
419 		KASSERT(pg->uobject == NULL);
420 		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL);
421 		return (-1);
422 	}
423 	if (pg->loan_count == 0) {
424 		pmap_page_protect(pg, VM_PROT_READ);
425 	}
426 	pg->loan_count++;
427 	uvm_pageactivate(pg);
428 	mutex_exit(&uvm_pageqlock);
429 	**output = pg;
430 	(*output)++;
431 
432 	/* unlock and return success */
433 	if (pg->uobject)
434 		mutex_exit(pg->uobject->vmobjlock);
435 	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
436 	return (1);
437 }
438 
439 /*
440  * uvm_loanpage: loan out pages to kernel (->K)
441  *
442  * => pages should be object-owned and the object should be locked.
443  * => in the case of error, the object might be unlocked and relocked.
444  * => caller should busy the pages beforehand.
445  * => pages will be unbusied.
446  * => fail with EBUSY if meet a wired page.
447  */
448 static int
449 uvm_loanpage(struct vm_page **pgpp, int npages)
450 {
451 	int i;
452 	int error = 0;
453 
454 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
455 
456 	for (i = 0; i < npages; i++) {
457 		struct vm_page *pg = pgpp[i];
458 
459 		KASSERT(pg->uobject != NULL);
460 		KASSERT(pg->uobject == pgpp[0]->uobject);
461 		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
462 		KASSERT(mutex_owned(pg->uobject->vmobjlock));
463 		KASSERT(pg->flags & PG_BUSY);
464 
465 		mutex_enter(&uvm_pageqlock);
466 		if (pg->wire_count > 0) {
467 			mutex_exit(&uvm_pageqlock);
468 			UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg,
469 			    0, 0, 0);
470 			error = EBUSY;
471 			break;
472 		}
473 		if (pg->loan_count == 0) {
474 			pmap_page_protect(pg, VM_PROT_READ);
475 		}
476 		pg->loan_count++;
477 		uvm_pageactivate(pg);
478 		mutex_exit(&uvm_pageqlock);
479 	}
480 
481 	uvm_page_unbusy(pgpp, npages);
482 
483 	if (error) {
484 		/*
485 		 * backout what we've done
486 		 */
487 		kmutex_t *slock = pgpp[0]->uobject->vmobjlock;
488 
489 		mutex_exit(slock);
490 		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
491 		mutex_enter(slock);
492 	}
493 
494 	UVMHIST_LOG(loanhist, "done %jd", error, 0, 0, 0);
495 	return error;
496 }
497 
498 /*
499  * XXX UBC temp limit
500  * number of pages to get at once.
501  * should be <= MAX_READ_AHEAD in genfs_vnops.c
502  */
503 #define	UVM_LOAN_GET_CHUNK	16
504 
505 /*
506  * uvm_loanuobjpages: loan pages from a uobj out (O->K)
507  *
508  * => uobj shouldn't be locked.  (we'll lock it)
509  * => fail with EBUSY if we meet a wired page.
510  */
511 int
512 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
513     struct vm_page **origpgpp)
514 {
515 	int ndone; /* # of pages loaned out */
516 	struct vm_page **pgpp;
517 	int error;
518 	int i;
519 	kmutex_t *slock;
520 
521 	pgpp = origpgpp;
522 	for (ndone = 0; ndone < orignpages; ) {
523 		int npages;
524 		/* npendloan: # of pages busied but not loand out yet. */
525 		int npendloan = 0xdead; /* XXX gcc */
526 reget:
527 		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
528 		mutex_enter(uobj->vmobjlock);
529 		error = (*uobj->pgops->pgo_get)(uobj,
530 		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
531 		    VM_PROT_READ, 0, PGO_SYNCIO);
532 		if (error == EAGAIN) {
533 			kpause("loanuopg", false, hz/2, NULL);
534 			continue;
535 		}
536 		if (error)
537 			goto fail;
538 
539 		KASSERT(npages > 0);
540 
541 		/* loan and unbusy pages */
542 		slock = NULL;
543 		for (i = 0; i < npages; i++) {
544 			kmutex_t *nextslock; /* slock for next page */
545 			struct vm_page *pg = *pgpp;
546 
547 			/* XXX assuming that the page is owned by uobj */
548 			KASSERT(pg->uobject != NULL);
549 			nextslock = pg->uobject->vmobjlock;
550 
551 			if (slock != nextslock) {
552 				if (slock) {
553 					KASSERT(npendloan > 0);
554 					error = uvm_loanpage(pgpp - npendloan,
555 					    npendloan);
556 					mutex_exit(slock);
557 					if (error)
558 						goto fail;
559 					ndone += npendloan;
560 					KASSERT(origpgpp + ndone == pgpp);
561 				}
562 				slock = nextslock;
563 				npendloan = 0;
564 				mutex_enter(slock);
565 			}
566 
567 			if ((pg->flags & PG_RELEASED) != 0) {
568 				/*
569 				 * release pages and try again.
570 				 */
571 				mutex_exit(slock);
572 				for (; i < npages; i++) {
573 					pg = pgpp[i];
574 					slock = pg->uobject->vmobjlock;
575 
576 					mutex_enter(slock);
577 					mutex_enter(&uvm_pageqlock);
578 					uvm_page_unbusy(&pg, 1);
579 					mutex_exit(&uvm_pageqlock);
580 					mutex_exit(slock);
581 				}
582 				goto reget;
583 			}
584 
585 			npendloan++;
586 			pgpp++;
587 			KASSERT(origpgpp + ndone + npendloan == pgpp);
588 		}
589 		KASSERT(slock != NULL);
590 		KASSERT(npendloan > 0);
591 		error = uvm_loanpage(pgpp - npendloan, npendloan);
592 		mutex_exit(slock);
593 		if (error)
594 			goto fail;
595 		ndone += npendloan;
596 		KASSERT(origpgpp + ndone == pgpp);
597 	}
598 
599 	return 0;
600 
601 fail:
602 	uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
603 
604 	return error;
605 }
606 
607 /*
608  * uvm_loanuobj: loan a page from a uobj out
609  *
610  * => called with map, amap, uobj locked
611  * => return value:
612  *	-1 = fatal error, everything is unlocked, abort.
613  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
614  *		try again
615  *	 1 = got it, everything still locked
616  */
617 
618 static int
619 uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
620 {
621 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
622 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
623 	struct vm_page *pg;
624 	int error, npages;
625 	bool locked;
626 
627 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
628 
629 	/*
630 	 * first we must make sure the page is resident.
631 	 *
632 	 * XXXCDC: duplicate code with uvm_fault().
633 	 */
634 
635 	/* locked: maps(read), amap(if there) */
636 	mutex_enter(uobj->vmobjlock);
637 	/* locked: maps(read), amap(if there), uobj */
638 
639 	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
640 		npages = 1;
641 		pg = NULL;
642 		error = (*uobj->pgops->pgo_get)(uobj,
643 		    va - ufi->entry->start + ufi->entry->offset,
644 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
645 	} else {
646 		error = EIO;		/* must have pgo_get op */
647 	}
648 
649 	/*
650 	 * check the result of the locked pgo_get.  if there is a problem,
651 	 * then we fail the loan.
652 	 */
653 
654 	if (error && error != EBUSY) {
655 		uvmfault_unlockall(ufi, amap, uobj);
656 		return (-1);
657 	}
658 
659 	/*
660 	 * if we need to unlock for I/O, do so now.
661 	 */
662 
663 	if (error == EBUSY) {
664 		uvmfault_unlockall(ufi, amap, NULL);
665 
666 		/* locked: uobj */
667 		npages = 1;
668 		error = (*uobj->pgops->pgo_get)(uobj,
669 		    va - ufi->entry->start + ufi->entry->offset,
670 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
671 		/* locked: <nothing> */
672 
673 		if (error) {
674 			if (error == EAGAIN) {
675 				kpause("fltagain2", false, hz/2, NULL);
676 				return (0);
677 			}
678 			return (-1);
679 		}
680 
681 		/*
682 		 * pgo_get was a success.   attempt to relock everything.
683 		 */
684 
685 		locked = uvmfault_relock(ufi);
686 		if (locked && amap)
687 			amap_lock(amap);
688 		uobj = pg->uobject;
689 		mutex_enter(uobj->vmobjlock);
690 
691 		/*
692 		 * verify that the page has not be released and re-verify
693 		 * that amap slot is still free.   if there is a problem we
694 		 * drop our lock (thus force a lookup refresh/retry).
695 		 */
696 
697 		if ((pg->flags & PG_RELEASED) != 0 ||
698 		    (locked && amap && amap_lookup(&ufi->entry->aref,
699 		    ufi->orig_rvaddr - ufi->entry->start))) {
700 			if (locked)
701 				uvmfault_unlockall(ufi, amap, NULL);
702 			locked = false;
703 		}
704 
705 		/*
706 		 * didn't get the lock?   release the page and retry.
707 		 */
708 
709 		if (locked == false) {
710 			if (pg->flags & PG_WANTED) {
711 				wakeup(pg);
712 			}
713 			if (pg->flags & PG_RELEASED) {
714 				mutex_enter(&uvm_pageqlock);
715 				uvm_pagefree(pg);
716 				mutex_exit(&uvm_pageqlock);
717 				mutex_exit(uobj->vmobjlock);
718 				return (0);
719 			}
720 			mutex_enter(&uvm_pageqlock);
721 			uvm_pageactivate(pg);
722 			mutex_exit(&uvm_pageqlock);
723 			pg->flags &= ~(PG_BUSY|PG_WANTED);
724 			UVM_PAGE_OWN(pg, NULL);
725 			mutex_exit(uobj->vmobjlock);
726 			return (0);
727 		}
728 	}
729 
730 	KASSERT(uobj == pg->uobject);
731 
732 	/*
733 	 * at this point we have the page we want ("pg") marked PG_BUSY for us
734 	 * and we have all data structures locked.  do the loanout.  page can
735 	 * not be PG_RELEASED (we caught this above).
736 	 */
737 
738 	if ((flags & UVM_LOAN_TOANON) == 0) {
739 		if (uvm_loanpage(&pg, 1)) {
740 			uvmfault_unlockall(ufi, amap, uobj);
741 			return (-1);
742 		}
743 		mutex_exit(uobj->vmobjlock);
744 		**output = pg;
745 		(*output)++;
746 		return (1);
747 	}
748 
749 #ifdef notdef
750 	/*
751 	 * must be a loan to an anon.   check to see if there is already
752 	 * an anon associated with this page.  if so, then just return
753 	 * a reference to this object.   the page should already be
754 	 * mapped read-only because it is already on loan.
755 	 */
756 
757 	if (pg->uanon) {
758 		/* XXX: locking */
759 		anon = pg->uanon;
760 		anon->an_ref++;
761 		if (pg->flags & PG_WANTED) {
762 			wakeup(pg);
763 		}
764 		pg->flags &= ~(PG_WANTED|PG_BUSY);
765 		UVM_PAGE_OWN(pg, NULL);
766 		mutex_exit(uobj->vmobjlock);
767 		**output = anon;
768 		(*output)++;
769 		return (1);
770 	}
771 
772 	/*
773 	 * need to allocate a new anon
774 	 */
775 
776 	anon = uvm_analloc();
777 	if (anon == NULL) {
778 		goto fail;
779 	}
780 	mutex_enter(&uvm_pageqlock);
781 	if (pg->wire_count > 0) {
782 		mutex_exit(&uvm_pageqlock);
783 		UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg, 0, 0, 0);
784 		goto fail;
785 	}
786 	if (pg->loan_count == 0) {
787 		pmap_page_protect(pg, VM_PROT_READ);
788 	}
789 	pg->loan_count++;
790 	pg->uanon = anon;
791 	anon->an_page = pg;
792 	anon->an_lock = /* TODO: share amap lock */
793 	uvm_pageactivate(pg);
794 	mutex_exit(&uvm_pageqlock);
795 	if (pg->flags & PG_WANTED) {
796 		wakeup(pg);
797 	}
798 	pg->flags &= ~(PG_WANTED|PG_BUSY);
799 	UVM_PAGE_OWN(pg, NULL);
800 	mutex_exit(uobj->vmobjlock);
801 	mutex_exit(&anon->an_lock);
802 	**output = anon;
803 	(*output)++;
804 	return (1);
805 
806 fail:
807 	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
808 	/*
809 	 * unlock everything and bail out.
810 	 */
811 	if (pg->flags & PG_WANTED) {
812 		wakeup(pg);
813 	}
814 	pg->flags &= ~(PG_WANTED|PG_BUSY);
815 	UVM_PAGE_OWN(pg, NULL);
816 	uvmfault_unlockall(ufi, amap, uobj, NULL);
817 	if (anon) {
818 		anon->an_ref--;
819 		uvm_anon_free(anon);
820 	}
821 #endif	/* notdef */
822 	return (-1);
823 }
824 
825 /*
826  * uvm_loanzero: loan a zero-fill page out
827  *
828  * => called with map, amap, uobj locked
829  * => return value:
830  *	-1 = fatal error, everything is unlocked, abort.
831  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
832  *		try again
833  *	 1 = got it, everything still locked
834  */
835 
836 static struct uvm_object uvm_loanzero_object;
837 static kmutex_t uvm_loanzero_lock;
838 
839 static int
840 uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
841 {
842 	struct vm_page *pg;
843 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
844 
845 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
846 again:
847 	mutex_enter(uvm_loanzero_object.vmobjlock);
848 
849 	/*
850 	 * first, get ahold of our single zero page.
851 	 */
852 
853 	if (__predict_false((pg =
854 			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
855 		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
856 					   UVM_PGA_ZERO)) == NULL) {
857 			mutex_exit(uvm_loanzero_object.vmobjlock);
858 			uvmfault_unlockall(ufi, amap, NULL);
859 			uvm_wait("loanzero");
860 			if (!uvmfault_relock(ufi)) {
861 				return (0);
862 			}
863 			if (amap) {
864 				amap_lock(amap);
865 			}
866 			goto again;
867 		}
868 
869 		/* got a zero'd page. */
870 		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
871 		pg->flags |= PG_RDONLY;
872 		mutex_enter(&uvm_pageqlock);
873 		uvm_pageactivate(pg);
874 		mutex_exit(&uvm_pageqlock);
875 		UVM_PAGE_OWN(pg, NULL);
876 	}
877 
878 	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
879 		mutex_enter(&uvm_pageqlock);
880 		pg->loan_count++;
881 		mutex_exit(&uvm_pageqlock);
882 		mutex_exit(uvm_loanzero_object.vmobjlock);
883 		**output = pg;
884 		(*output)++;
885 		return (1);
886 	}
887 
888 #ifdef notdef
889 	/*
890 	 * loaning to an anon.  check to see if there is already an anon
891 	 * associated with this page.  if so, then just return a reference
892 	 * to this object.
893 	 */
894 
895 	if (pg->uanon) {
896 		anon = pg->uanon;
897 		mutex_enter(&anon->an_lock);
898 		anon->an_ref++;
899 		mutex_exit(&anon->an_lock);
900 		mutex_exit(uvm_loanzero_object.vmobjlock);
901 		**output = anon;
902 		(*output)++;
903 		return (1);
904 	}
905 
906 	/*
907 	 * need to allocate a new anon
908 	 */
909 
910 	anon = uvm_analloc();
911 	if (anon == NULL) {
912 		/* out of swap causes us to fail */
913 		mutex_exit(uvm_loanzero_object.vmobjlock);
914 		uvmfault_unlockall(ufi, amap, NULL, NULL);
915 		return (-1);
916 	}
917 	anon->an_page = pg;
918 	pg->uanon = anon;
919 	mutex_enter(&uvm_pageqlock);
920 	pg->loan_count++;
921 	uvm_pageactivate(pg);
922 	mutex_exit(&uvm_pageqlock);
923 	mutex_exit(&anon->an_lock);
924 	mutex_exit(uvm_loanzero_object.vmobjlock);
925 	**output = anon;
926 	(*output)++;
927 	return (1);
928 #else
929 	return (-1);
930 #endif
931 }
932 
933 
934 /*
935  * uvm_unloananon: kill loans on anons (basically a normal ref drop)
936  *
937  * => we expect all our resources to be unlocked
938  */
939 
940 static void
941 uvm_unloananon(struct vm_anon **aloans, int nanons)
942 {
943 #ifdef notdef
944 	struct vm_anon *anon, *to_free = NULL;
945 
946 	/* TODO: locking */
947 	amap_lock(amap);
948 	while (nanons-- > 0) {
949 		anon = *aloans++;
950 		if (--anon->an_ref == 0) {
951 			anon->an_link = to_free;
952 			to_free = anon;
953 		}
954 	}
955 	uvm_anon_freelst(amap, to_free);
956 #endif	/* notdef */
957 }
958 
959 /*
960  * uvm_unloanpage: kill loans on pages loaned out to the kernel
961  *
962  * => we expect all our resources to be unlocked
963  */
964 
965 static void
966 uvm_unloanpage(struct vm_page **ploans, int npages)
967 {
968 	struct vm_page *pg;
969 	kmutex_t *slock;
970 
971 	mutex_enter(&uvm_pageqlock);
972 	while (npages-- > 0) {
973 		pg = *ploans++;
974 
975 		/*
976 		 * do a little dance to acquire the object or anon lock
977 		 * as appropriate.  we are locking in the wrong order,
978 		 * so we have to do a try-lock here.
979 		 */
980 
981 		slock = NULL;
982 		while (pg->uobject != NULL || pg->uanon != NULL) {
983 			if (pg->uobject != NULL) {
984 				slock = pg->uobject->vmobjlock;
985 			} else {
986 				slock = pg->uanon->an_lock;
987 			}
988 			if (mutex_tryenter(slock)) {
989 				break;
990 			}
991 			/* XXX Better than yielding but inadequate. */
992 			kpause("livelock", false, 1, &uvm_pageqlock);
993 			slock = NULL;
994 		}
995 
996 		/*
997 		 * drop our loan.  if page is owned by an anon but
998 		 * PQ_ANON is not set, the page was loaned to the anon
999 		 * from an object which dropped ownership, so resolve
1000 		 * this by turning the anon's loan into real ownership
1001 		 * (ie. decrement loan_count again and set PQ_ANON).
1002 		 * after all this, if there are no loans left, put the
1003 		 * page back a paging queue (if the page is owned by
1004 		 * an anon) or free it (if the page is now unowned).
1005 		 */
1006 
1007 		KASSERT(pg->loan_count > 0);
1008 		pg->loan_count--;
1009 		if (pg->uobject == NULL && pg->uanon != NULL &&
1010 		    (pg->pqflags & PQ_ANON) == 0) {
1011 			KASSERT(pg->loan_count > 0);
1012 			pg->loan_count--;
1013 			pg->pqflags |= PQ_ANON;
1014 		}
1015 		if (pg->loan_count == 0 && pg->uobject == NULL &&
1016 		    pg->uanon == NULL) {
1017 			KASSERT((pg->flags & PG_BUSY) == 0);
1018 			uvm_pagefree(pg);
1019 		}
1020 		if (slock != NULL) {
1021 			mutex_exit(slock);
1022 		}
1023 	}
1024 	mutex_exit(&uvm_pageqlock);
1025 }
1026 
1027 /*
1028  * uvm_unloan: kill loans on pages or anons.
1029  */
1030 
1031 void
1032 uvm_unloan(void *v, int npages, int flags)
1033 {
1034 	if (flags & UVM_LOAN_TOANON) {
1035 		uvm_unloananon(v, npages);
1036 	} else {
1037 		uvm_unloanpage(v, npages);
1038 	}
1039 }
1040 
1041 /*
1042  * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1043  * method, because the page can end up on a paging queue, and the
1044  * page daemon will want to call pgo_put when it encounters the page
1045  * on the inactive list.
1046  */
1047 
1048 static int
1049 ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1050 {
1051 	struct vm_page *pg;
1052 
1053 	KDASSERT(uobj == &uvm_loanzero_object);
1054 
1055 	/*
1056 	 * Don't need to do any work here if we're not freeing pages.
1057 	 */
1058 
1059 	if ((flags & PGO_FREE) == 0) {
1060 		mutex_exit(uobj->vmobjlock);
1061 		return 0;
1062 	}
1063 
1064 	/*
1065 	 * we don't actually want to ever free the uvm_loanzero_page, so
1066 	 * just reactivate or dequeue it.
1067 	 */
1068 
1069 	pg = TAILQ_FIRST(&uobj->memq);
1070 	KASSERT(pg != NULL);
1071 	KASSERT(TAILQ_NEXT(pg, listq.queue) == NULL);
1072 
1073 	mutex_enter(&uvm_pageqlock);
1074 	if (pg->uanon)
1075 		uvm_pageactivate(pg);
1076 	else
1077 		uvm_pagedequeue(pg);
1078 	mutex_exit(&uvm_pageqlock);
1079 
1080 	mutex_exit(uobj->vmobjlock);
1081 	return 0;
1082 }
1083 
1084 static const struct uvm_pagerops ulz_pager = {
1085 	.pgo_put = ulz_put,
1086 };
1087 
1088 /*
1089  * uvm_loan_init(): initialize the uvm_loan() facility.
1090  */
1091 
1092 void
1093 uvm_loan_init(void)
1094 {
1095 
1096 	mutex_init(&uvm_loanzero_lock, MUTEX_DEFAULT, IPL_NONE);
1097 	uvm_obj_init(&uvm_loanzero_object, &ulz_pager, false, 0);
1098 	uvm_obj_setlock(&uvm_loanzero_object, &uvm_loanzero_lock);
1099 
1100 	UVMHIST_INIT(loanhist, 300);
1101 }
1102 
1103 /*
1104  * uvm_loanbreak: break loan on a uobj page
1105  *
1106  * => called with uobj locked
1107  * => the page should be busy
1108  * => return value:
1109  *	newly allocated page if succeeded
1110  */
1111 struct vm_page *
1112 uvm_loanbreak(struct vm_page *uobjpage)
1113 {
1114 	struct vm_page *pg;
1115 	struct uvm_object *uobj __diagused = uobjpage->uobject;
1116 
1117 	KASSERT(uobj != NULL);
1118 	KASSERT(mutex_owned(uobj->vmobjlock));
1119 	KASSERT(uobjpage->flags & PG_BUSY);
1120 
1121 	/* alloc new un-owned page */
1122 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1123 	if (pg == NULL)
1124 		return NULL;
1125 
1126 	/*
1127 	 * copy the data from the old page to the new
1128 	 * one and clear the fake flags on the new page (keep it busy).
1129 	 * force a reload of the old page by clearing it from all
1130 	 * pmaps.
1131 	 * transfer dirtiness of the old page to the new page.
1132 	 * then lock the page queues to rename the pages.
1133 	 */
1134 
1135 	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1136 	pg->flags &= ~PG_FAKE;
1137 	pmap_page_protect(uobjpage, VM_PROT_NONE);
1138 	if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) {
1139 		pmap_clear_modify(pg);
1140 		pg->flags |= PG_CLEAN;
1141 	} else {
1142 		/* uvm_pagecopy marked it dirty */
1143 		KASSERT((pg->flags & PG_CLEAN) == 0);
1144 		/* a object with a dirty page should be dirty. */
1145 		KASSERT(!UVM_OBJ_IS_CLEAN(uobj));
1146 	}
1147 	if (uobjpage->flags & PG_WANTED)
1148 		wakeup(uobjpage);
1149 	/* uobj still locked */
1150 	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1151 	UVM_PAGE_OWN(uobjpage, NULL);
1152 
1153 	mutex_enter(&uvm_pageqlock);
1154 
1155 	/*
1156 	 * replace uobjpage with new page.
1157 	 */
1158 
1159 	uvm_pagereplace(uobjpage, pg);
1160 
1161 	/*
1162 	 * if the page is no longer referenced by
1163 	 * an anon (i.e. we are breaking an O->K
1164 	 * loan), then remove it from any pageq's.
1165 	 */
1166 	if (uobjpage->uanon == NULL)
1167 		uvm_pagedequeue(uobjpage);
1168 
1169 	/*
1170 	 * at this point we have absolutely no
1171 	 * control over uobjpage
1172 	 */
1173 
1174 	/* install new page */
1175 	uvm_pageactivate(pg);
1176 	mutex_exit(&uvm_pageqlock);
1177 
1178 	/*
1179 	 * done!  loan is broken and "pg" is
1180 	 * PG_BUSY.   it can now replace uobjpage.
1181 	 */
1182 
1183 	return pg;
1184 }
1185 
1186 int
1187 uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
1188 {
1189 	struct vm_page *pg;
1190 
1191 	KASSERT(mutex_owned(anon->an_lock));
1192 	KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock));
1193 
1194 	/* get new un-owned replacement page */
1195 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1196 	if (pg == NULL) {
1197 		return ENOMEM;
1198 	}
1199 
1200 	/* copy old -> new */
1201 	uvm_pagecopy(anon->an_page, pg);
1202 
1203 	/* force reload */
1204 	pmap_page_protect(anon->an_page, VM_PROT_NONE);
1205 	mutex_enter(&uvm_pageqlock);	  /* KILL loan */
1206 
1207 	anon->an_page->uanon = NULL;
1208 	/* in case we owned */
1209 	anon->an_page->pqflags &= ~PQ_ANON;
1210 
1211 	if (uobj) {
1212 		/* if we were receiver of loan */
1213 		anon->an_page->loan_count--;
1214 	} else {
1215 		/*
1216 		 * we were the lender (A->K); need to remove the page from
1217 		 * pageq's.
1218 		 */
1219 		uvm_pagedequeue(anon->an_page);
1220 	}
1221 
1222 	if (uobj) {
1223 		mutex_exit(uobj->vmobjlock);
1224 	}
1225 
1226 	/* install new page in anon */
1227 	anon->an_page = pg;
1228 	pg->uanon = anon;
1229 	pg->pqflags |= PQ_ANON;
1230 
1231 	uvm_pageactivate(pg);
1232 	mutex_exit(&uvm_pageqlock);
1233 
1234 	pg->flags &= ~(PG_BUSY|PG_FAKE);
1235 	UVM_PAGE_OWN(pg, NULL);
1236 
1237 	/* done! */
1238 
1239 	return 0;
1240 }
1241