xref: /netbsd-src/sys/uvm/uvm_loan.c (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1 /*	$NetBSD: uvm_loan.c,v 1.81 2011/08/06 17:25:03 rmind Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
28  */
29 
30 /*
31  * uvm_loan.c: page loanout handler
32  */
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.81 2011/08/06 17:25:03 rmind Exp $");
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/mman.h>
41 
42 #include <uvm/uvm.h>
43 
44 /*
45  * "loaned" pages are pages which are (read-only, copy-on-write) loaned
46  * from the VM system to other parts of the kernel.   this allows page
47  * copying to be avoided (e.g. you can loan pages from objs/anons to
48  * the mbuf system).
49  *
50  * there are 3 types of loans possible:
51  *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
52  *  A->K  anon page to wired kernel page (e.g. mbuf data area)
53  *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
54  * note that it possible to have an O page loaned to both an A and K
55  * at the same time.
56  *
57  * loans are tracked by pg->loan_count.  an O->A page will have both
58  * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
59  * of page is considered "owned" by the uvm_object (not the anon).
60  *
61  * each loan of a page to the kernel bumps the pg->wire_count.  the
62  * kernel mappings for these pages will be read-only and wired.  since
63  * the page will also be wired, it will not be a candidate for pageout,
64  * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
65  * write fault in the kernel to one of these pages will not cause
66  * copy-on-write.  instead, the page fault is considered fatal.  this
67  * is because the kernel mapping will have no way to look up the
68  * object/anon which the page is owned by.  this is a good side-effect,
69  * since a kernel write to a loaned page is an error.
70  *
71  * owners that want to free their pages and discover that they are
72  * loaned out simply "disown" them (the page becomes an orphan).  these
73  * pages should be freed when the last loan is dropped.   in some cases
74  * an anon may "adopt" an orphaned page.
75  *
76  * locking: to read pg->loan_count either the owner or the page queues
77  * must be locked.   to modify pg->loan_count, both the owner of the page
78  * and the PQs must be locked.   pg->flags is (as always) locked by
79  * the owner of the page.
80  *
81  * note that locking from the "loaned" side is tricky since the object
82  * getting the loaned page has no reference to the page's owner and thus
83  * the owner could "die" at any time.   in order to prevent the owner
84  * from dying the page queues should be locked.   this forces us to sometimes
85  * use "try" locking.
86  *
87  * loans are typically broken by the following events:
88  *  1. user-level xwrite fault to a loaned page
89  *  2. pageout of clean+inactive O->A loaned page
90  *  3. owner frees page (e.g. pager flush)
91  *
92  * note that loaning a page causes all mappings of the page to become
93  * read-only (via pmap_page_protect).   this could have an unexpected
94  * effect on normal "wired" pages if one is not careful (XXX).
95  */
96 
97 /*
98  * local prototypes
99  */
100 
101 static int	uvm_loananon(struct uvm_faultinfo *, void ***,
102 			     int, struct vm_anon *);
103 static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
104 			     int, vaddr_t);
105 static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
106 static void	uvm_unloananon(struct vm_anon **, int);
107 static void	uvm_unloanpage(struct vm_page **, int);
108 static int	uvm_loanpage(struct vm_page **, int);
109 
110 
111 /*
112  * inlines
113  */
114 
115 /*
116  * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
117  *
118  * => "ufi" is the result of a successful map lookup (meaning that
119  *	on entry the map is locked by the caller)
120  * => we may unlock and then relock the map if needed (for I/O)
121  * => we put our output result in "output"
122  * => we always return with the map unlocked
123  * => possible return values:
124  *	-1 == error, map is unlocked
125  *	 0 == map relock error (try again!), map is unlocked
126  *	>0 == number of pages we loaned, map is unlocked
127  *
128  * NOTE: We can live with this being an inline, because it is only called
129  * from one place.
130  */
131 
132 static inline int
133 uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
134 {
135 	vaddr_t curaddr = ufi->orig_rvaddr;
136 	vsize_t togo = ufi->size;
137 	struct vm_aref *aref = &ufi->entry->aref;
138 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
139 	struct vm_anon *anon;
140 	int rv, result = 0;
141 
142 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
143 
144 	/*
145 	 * lock us the rest of the way down (we unlock before return)
146 	 */
147 	if (aref->ar_amap) {
148 		amap_lock(aref->ar_amap);
149 	}
150 
151 	/*
152 	 * loop until done
153 	 */
154 	while (togo) {
155 
156 		/*
157 		 * find the page we want.   check the anon layer first.
158 		 */
159 
160 		if (aref->ar_amap) {
161 			anon = amap_lookup(aref, curaddr - ufi->entry->start);
162 		} else {
163 			anon = NULL;
164 		}
165 
166 		/* locked: map, amap, uobj */
167 		if (anon) {
168 			rv = uvm_loananon(ufi, output, flags, anon);
169 		} else if (uobj) {
170 			rv = uvm_loanuobj(ufi, output, flags, curaddr);
171 		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
172 			rv = uvm_loanzero(ufi, output, flags);
173 		} else {
174 			uvmfault_unlockall(ufi, aref->ar_amap, uobj);
175 			rv = -1;
176 		}
177 		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
178 		KASSERT(rv > 0 || aref->ar_amap == NULL ||
179 		    !mutex_owned(aref->ar_amap->am_lock));
180 		KASSERT(rv > 0 || uobj == NULL ||
181 		    !mutex_owned(uobj->vmobjlock));
182 
183 		/* total failure */
184 		if (rv < 0) {
185 			UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0);
186 			return (-1);
187 		}
188 
189 		/* relock failed, need to do another lookup */
190 		if (rv == 0) {
191 			UVMHIST_LOG(loanhist, "relock failure %d", result
192 			    ,0,0,0);
193 			return (result);
194 		}
195 
196 		/*
197 		 * got it... advance to next page
198 		 */
199 
200 		result++;
201 		togo -= PAGE_SIZE;
202 		curaddr += PAGE_SIZE;
203 	}
204 
205 	/*
206 	 * unlock what we locked, unlock the maps and return
207 	 */
208 
209 	if (aref->ar_amap) {
210 		amap_unlock(aref->ar_amap);
211 	}
212 	uvmfault_unlockmaps(ufi, false);
213 	UVMHIST_LOG(loanhist, "done %d", result, 0,0,0);
214 	return (result);
215 }
216 
217 /*
218  * normal functions
219  */
220 
221 /*
222  * uvm_loan: loan pages in a map out to anons or to the kernel
223  *
224  * => map should be unlocked
225  * => start and len should be multiples of PAGE_SIZE
226  * => result is either an array of anon's or vm_pages (depending on flags)
227  * => flag values: UVM_LOAN_TOANON - loan to anons
228  *                 UVM_LOAN_TOPAGE - loan to wired kernel page
229  *    one and only one of these flags must be set!
230  * => returns 0 (success), or an appropriate error number
231  */
232 
233 int
234 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
235 {
236 	struct uvm_faultinfo ufi;
237 	void **result, **output;
238 	int rv, error;
239 
240 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
241 
242 	/*
243 	 * ensure that one and only one of the flags is set
244 	 */
245 
246 	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
247 		((flags & UVM_LOAN_TOPAGE) == 0));
248 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
249 
250 	/*
251 	 * "output" is a pointer to the current place to put the loaned page.
252 	 */
253 
254 	result = v;
255 	output = &result[0];	/* start at the beginning ... */
256 
257 	/*
258 	 * while we've got pages to do
259 	 */
260 
261 	while (len > 0) {
262 
263 		/*
264 		 * fill in params for a call to uvmfault_lookup
265 		 */
266 
267 		ufi.orig_map = map;
268 		ufi.orig_rvaddr = start;
269 		ufi.orig_size = len;
270 
271 		/*
272 		 * do the lookup, the only time this will fail is if we hit on
273 		 * an unmapped region (an error)
274 		 */
275 
276 		if (!uvmfault_lookup(&ufi, false)) {
277 			error = ENOENT;
278 			goto fail;
279 		}
280 
281 		/*
282 		 * map now locked.  now do the loanout...
283 		 */
284 
285 		rv = uvm_loanentry(&ufi, &output, flags);
286 		if (rv < 0) {
287 			/* all unlocked due to error */
288 			error = EINVAL;
289 			goto fail;
290 		}
291 
292 		/*
293 		 * done!  the map is unlocked.  advance, if possible.
294 		 *
295 		 * XXXCDC: could be recoded to hold the map lock with
296 		 *	   smarter code (but it only happens on map entry
297 		 *	   boundaries, so it isn't that bad).
298 		 */
299 
300 		if (rv) {
301 			rv <<= PAGE_SHIFT;
302 			len -= rv;
303 			start += rv;
304 		}
305 	}
306 	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
307 	return 0;
308 
309 fail:
310 	/*
311 	 * failed to complete loans.  drop any loans and return failure code.
312 	 * map is already unlocked.
313 	 */
314 
315 	if (output - result) {
316 		if (flags & UVM_LOAN_TOANON) {
317 			uvm_unloananon((struct vm_anon **)result,
318 			    output - result);
319 		} else {
320 			uvm_unloanpage((struct vm_page **)result,
321 			    output - result);
322 		}
323 	}
324 	UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
325 	return (error);
326 }
327 
328 /*
329  * uvm_loananon: loan a page from an anon out
330  *
331  * => called with map, amap, uobj locked
332  * => return value:
333  *	-1 = fatal error, everything is unlocked, abort.
334  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
335  *		try again
336  *	 1 = got it, everything still locked
337  */
338 
339 int
340 uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
341     struct vm_anon *anon)
342 {
343 	struct vm_page *pg;
344 	int error;
345 
346 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
347 
348 	/*
349 	 * if we are loaning to "another" anon then it is easy, we just
350 	 * bump the reference count on the current anon and return a
351 	 * pointer to it (it becomes copy-on-write shared).
352 	 */
353 
354 	if (flags & UVM_LOAN_TOANON) {
355 		KASSERT(mutex_owned(anon->an_lock));
356 		pg = anon->an_page;
357 		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
358 			if (pg->wire_count > 0) {
359 				UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0);
360 				uvmfault_unlockall(ufi,
361 				    ufi->entry->aref.ar_amap,
362 				    ufi->entry->object.uvm_obj);
363 				return (-1);
364 			}
365 			pmap_page_protect(pg, VM_PROT_READ);
366 		}
367 		anon->an_ref++;
368 		**output = anon;
369 		(*output)++;
370 		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
371 		return (1);
372 	}
373 
374 	/*
375 	 * we are loaning to a kernel-page.   we need to get the page
376 	 * resident so we can wire it.   uvmfault_anonget will handle
377 	 * this for us.
378 	 */
379 
380 	KASSERT(mutex_owned(anon->an_lock));
381 	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
382 
383 	/*
384 	 * if we were unable to get the anon, then uvmfault_anonget has
385 	 * unlocked everything and returned an error code.
386 	 */
387 
388 	if (error) {
389 		UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
390 
391 		/* need to refault (i.e. refresh our lookup) ? */
392 		if (error == ERESTART) {
393 			return (0);
394 		}
395 
396 		/* "try again"?   sleep a bit and retry ... */
397 		if (error == EAGAIN) {
398 			kpause("loanagain", false, hz/2, NULL);
399 			return (0);
400 		}
401 
402 		/* otherwise flag it as an error */
403 		return (-1);
404 	}
405 
406 	/*
407 	 * we have the page and its owner locked: do the loan now.
408 	 */
409 
410 	pg = anon->an_page;
411 	mutex_enter(&uvm_pageqlock);
412 	if (pg->wire_count > 0) {
413 		mutex_exit(&uvm_pageqlock);
414 		UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0);
415 		KASSERT(pg->uobject == NULL);
416 		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL);
417 		return (-1);
418 	}
419 	if (pg->loan_count == 0) {
420 		pmap_page_protect(pg, VM_PROT_READ);
421 	}
422 	pg->loan_count++;
423 	uvm_pageactivate(pg);
424 	mutex_exit(&uvm_pageqlock);
425 	**output = pg;
426 	(*output)++;
427 
428 	/* unlock and return success */
429 	if (pg->uobject)
430 		mutex_exit(pg->uobject->vmobjlock);
431 	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
432 	return (1);
433 }
434 
435 /*
436  * uvm_loanpage: loan out pages to kernel (->K)
437  *
438  * => pages should be object-owned and the object should be locked.
439  * => in the case of error, the object might be unlocked and relocked.
440  * => caller should busy the pages beforehand.
441  * => pages will be unbusied.
442  * => fail with EBUSY if meet a wired page.
443  */
444 static int
445 uvm_loanpage(struct vm_page **pgpp, int npages)
446 {
447 	int i;
448 	int error = 0;
449 
450 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
451 
452 	for (i = 0; i < npages; i++) {
453 		struct vm_page *pg = pgpp[i];
454 
455 		KASSERT(pg->uobject != NULL);
456 		KASSERT(pg->uobject == pgpp[0]->uobject);
457 		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
458 		KASSERT(mutex_owned(pg->uobject->vmobjlock));
459 		KASSERT(pg->flags & PG_BUSY);
460 
461 		mutex_enter(&uvm_pageqlock);
462 		if (pg->wire_count > 0) {
463 			mutex_exit(&uvm_pageqlock);
464 			UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
465 			error = EBUSY;
466 			break;
467 		}
468 		if (pg->loan_count == 0) {
469 			pmap_page_protect(pg, VM_PROT_READ);
470 		}
471 		pg->loan_count++;
472 		uvm_pageactivate(pg);
473 		mutex_exit(&uvm_pageqlock);
474 	}
475 
476 	uvm_page_unbusy(pgpp, npages);
477 
478 	if (error) {
479 		/*
480 		 * backout what we've done
481 		 */
482 		kmutex_t *slock = pgpp[0]->uobject->vmobjlock;
483 
484 		mutex_exit(slock);
485 		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
486 		mutex_enter(slock);
487 	}
488 
489 	UVMHIST_LOG(loanhist, "done %d", error,0,0,0);
490 	return error;
491 }
492 
493 /*
494  * XXX UBC temp limit
495  * number of pages to get at once.
496  * should be <= MAX_READ_AHEAD in genfs_vnops.c
497  */
498 #define	UVM_LOAN_GET_CHUNK	16
499 
500 /*
501  * uvm_loanuobjpages: loan pages from a uobj out (O->K)
502  *
503  * => uobj shouldn't be locked.  (we'll lock it)
504  * => fail with EBUSY if we meet a wired page.
505  */
506 int
507 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
508     struct vm_page **origpgpp)
509 {
510 	int ndone; /* # of pages loaned out */
511 	struct vm_page **pgpp;
512 	int error;
513 	int i;
514 	kmutex_t *slock;
515 
516 	pgpp = origpgpp;
517 	for (ndone = 0; ndone < orignpages; ) {
518 		int npages;
519 		/* npendloan: # of pages busied but not loand out yet. */
520 		int npendloan = 0xdead; /* XXX gcc */
521 reget:
522 		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
523 		mutex_enter(uobj->vmobjlock);
524 		error = (*uobj->pgops->pgo_get)(uobj,
525 		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
526 		    VM_PROT_READ, 0, PGO_SYNCIO);
527 		if (error == EAGAIN) {
528 			kpause("loanuopg", false, hz/2, NULL);
529 			continue;
530 		}
531 		if (error)
532 			goto fail;
533 
534 		KASSERT(npages > 0);
535 
536 		/* loan and unbusy pages */
537 		slock = NULL;
538 		for (i = 0; i < npages; i++) {
539 			kmutex_t *nextslock; /* slock for next page */
540 			struct vm_page *pg = *pgpp;
541 
542 			/* XXX assuming that the page is owned by uobj */
543 			KASSERT(pg->uobject != NULL);
544 			nextslock = pg->uobject->vmobjlock;
545 
546 			if (slock != nextslock) {
547 				if (slock) {
548 					KASSERT(npendloan > 0);
549 					error = uvm_loanpage(pgpp - npendloan,
550 					    npendloan);
551 					mutex_exit(slock);
552 					if (error)
553 						goto fail;
554 					ndone += npendloan;
555 					KASSERT(origpgpp + ndone == pgpp);
556 				}
557 				slock = nextslock;
558 				npendloan = 0;
559 				mutex_enter(slock);
560 			}
561 
562 			if ((pg->flags & PG_RELEASED) != 0) {
563 				/*
564 				 * release pages and try again.
565 				 */
566 				mutex_exit(slock);
567 				for (; i < npages; i++) {
568 					pg = pgpp[i];
569 					slock = pg->uobject->vmobjlock;
570 
571 					mutex_enter(slock);
572 					mutex_enter(&uvm_pageqlock);
573 					uvm_page_unbusy(&pg, 1);
574 					mutex_exit(&uvm_pageqlock);
575 					mutex_exit(slock);
576 				}
577 				goto reget;
578 			}
579 
580 			npendloan++;
581 			pgpp++;
582 			KASSERT(origpgpp + ndone + npendloan == pgpp);
583 		}
584 		KASSERT(slock != NULL);
585 		KASSERT(npendloan > 0);
586 		error = uvm_loanpage(pgpp - npendloan, npendloan);
587 		mutex_exit(slock);
588 		if (error)
589 			goto fail;
590 		ndone += npendloan;
591 		KASSERT(origpgpp + ndone == pgpp);
592 	}
593 
594 	return 0;
595 
596 fail:
597 	uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
598 
599 	return error;
600 }
601 
602 /*
603  * uvm_loanuobj: loan a page from a uobj out
604  *
605  * => called with map, amap, uobj locked
606  * => return value:
607  *	-1 = fatal error, everything is unlocked, abort.
608  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
609  *		try again
610  *	 1 = got it, everything still locked
611  */
612 
613 static int
614 uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
615 {
616 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
617 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
618 	struct vm_page *pg;
619 	int error, npages;
620 	bool locked;
621 
622 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
623 
624 	/*
625 	 * first we must make sure the page is resident.
626 	 *
627 	 * XXXCDC: duplicate code with uvm_fault().
628 	 */
629 
630 	/* locked: maps(read), amap(if there) */
631 	mutex_enter(uobj->vmobjlock);
632 	/* locked: maps(read), amap(if there), uobj */
633 
634 	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
635 		npages = 1;
636 		pg = NULL;
637 		error = (*uobj->pgops->pgo_get)(uobj,
638 		    va - ufi->entry->start + ufi->entry->offset,
639 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
640 	} else {
641 		error = EIO;		/* must have pgo_get op */
642 	}
643 
644 	/*
645 	 * check the result of the locked pgo_get.  if there is a problem,
646 	 * then we fail the loan.
647 	 */
648 
649 	if (error && error != EBUSY) {
650 		uvmfault_unlockall(ufi, amap, uobj);
651 		return (-1);
652 	}
653 
654 	/*
655 	 * if we need to unlock for I/O, do so now.
656 	 */
657 
658 	if (error == EBUSY) {
659 		uvmfault_unlockall(ufi, amap, NULL);
660 
661 		/* locked: uobj */
662 		npages = 1;
663 		error = (*uobj->pgops->pgo_get)(uobj,
664 		    va - ufi->entry->start + ufi->entry->offset,
665 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
666 		/* locked: <nothing> */
667 
668 		if (error) {
669 			if (error == EAGAIN) {
670 				kpause("fltagain2", false, hz/2, NULL);
671 				return (0);
672 			}
673 			return (-1);
674 		}
675 
676 		/*
677 		 * pgo_get was a success.   attempt to relock everything.
678 		 */
679 
680 		locked = uvmfault_relock(ufi);
681 		if (locked && amap)
682 			amap_lock(amap);
683 		uobj = pg->uobject;
684 		mutex_enter(uobj->vmobjlock);
685 
686 		/*
687 		 * verify that the page has not be released and re-verify
688 		 * that amap slot is still free.   if there is a problem we
689 		 * drop our lock (thus force a lookup refresh/retry).
690 		 */
691 
692 		if ((pg->flags & PG_RELEASED) != 0 ||
693 		    (locked && amap && amap_lookup(&ufi->entry->aref,
694 		    ufi->orig_rvaddr - ufi->entry->start))) {
695 			if (locked)
696 				uvmfault_unlockall(ufi, amap, NULL);
697 			locked = false;
698 		}
699 
700 		/*
701 		 * didn't get the lock?   release the page and retry.
702 		 */
703 
704 		if (locked == false) {
705 			if (pg->flags & PG_WANTED) {
706 				wakeup(pg);
707 			}
708 			if (pg->flags & PG_RELEASED) {
709 				mutex_enter(&uvm_pageqlock);
710 				uvm_pagefree(pg);
711 				mutex_exit(&uvm_pageqlock);
712 				mutex_exit(uobj->vmobjlock);
713 				return (0);
714 			}
715 			mutex_enter(&uvm_pageqlock);
716 			uvm_pageactivate(pg);
717 			mutex_exit(&uvm_pageqlock);
718 			pg->flags &= ~(PG_BUSY|PG_WANTED);
719 			UVM_PAGE_OWN(pg, NULL);
720 			mutex_exit(uobj->vmobjlock);
721 			return (0);
722 		}
723 	}
724 
725 	KASSERT(uobj == pg->uobject);
726 
727 	/*
728 	 * at this point we have the page we want ("pg") marked PG_BUSY for us
729 	 * and we have all data structures locked.  do the loanout.  page can
730 	 * not be PG_RELEASED (we caught this above).
731 	 */
732 
733 	if ((flags & UVM_LOAN_TOANON) == 0) {
734 		if (uvm_loanpage(&pg, 1)) {
735 			uvmfault_unlockall(ufi, amap, uobj);
736 			return (-1);
737 		}
738 		mutex_exit(uobj->vmobjlock);
739 		**output = pg;
740 		(*output)++;
741 		return (1);
742 	}
743 
744 #ifdef notdef
745 	/*
746 	 * must be a loan to an anon.   check to see if there is already
747 	 * an anon associated with this page.  if so, then just return
748 	 * a reference to this object.   the page should already be
749 	 * mapped read-only because it is already on loan.
750 	 */
751 
752 	if (pg->uanon) {
753 		/* XXX: locking */
754 		anon = pg->uanon;
755 		anon->an_ref++;
756 		if (pg->flags & PG_WANTED) {
757 			wakeup(pg);
758 		}
759 		pg->flags &= ~(PG_WANTED|PG_BUSY);
760 		UVM_PAGE_OWN(pg, NULL);
761 		mutex_exit(uobj->vmobjlock);
762 		**output = anon;
763 		(*output)++;
764 		return (1);
765 	}
766 
767 	/*
768 	 * need to allocate a new anon
769 	 */
770 
771 	anon = uvm_analloc();
772 	if (anon == NULL) {
773 		goto fail;
774 	}
775 	mutex_enter(&uvm_pageqlock);
776 	if (pg->wire_count > 0) {
777 		mutex_exit(&uvm_pageqlock);
778 		UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
779 		goto fail;
780 	}
781 	if (pg->loan_count == 0) {
782 		pmap_page_protect(pg, VM_PROT_READ);
783 	}
784 	pg->loan_count++;
785 	pg->uanon = anon;
786 	anon->an_page = pg;
787 	anon->an_lock = /* TODO: share amap lock */
788 	uvm_pageactivate(pg);
789 	mutex_exit(&uvm_pageqlock);
790 	if (pg->flags & PG_WANTED) {
791 		wakeup(pg);
792 	}
793 	pg->flags &= ~(PG_WANTED|PG_BUSY);
794 	UVM_PAGE_OWN(pg, NULL);
795 	mutex_exit(uobj->vmobjlock);
796 	mutex_exit(&anon->an_lock);
797 	**output = anon;
798 	(*output)++;
799 	return (1);
800 
801 fail:
802 	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
803 	/*
804 	 * unlock everything and bail out.
805 	 */
806 	if (pg->flags & PG_WANTED) {
807 		wakeup(pg);
808 	}
809 	pg->flags &= ~(PG_WANTED|PG_BUSY);
810 	UVM_PAGE_OWN(pg, NULL);
811 	uvmfault_unlockall(ufi, amap, uobj, NULL);
812 	if (anon) {
813 		anon->an_ref--;
814 		uvm_anon_free(anon);
815 	}
816 #endif	/* notdef */
817 	return (-1);
818 }
819 
820 /*
821  * uvm_loanzero: loan a zero-fill page out
822  *
823  * => called with map, amap, uobj locked
824  * => return value:
825  *	-1 = fatal error, everything is unlocked, abort.
826  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
827  *		try again
828  *	 1 = got it, everything still locked
829  */
830 
831 static struct uvm_object uvm_loanzero_object;
832 static kmutex_t uvm_loanzero_lock;
833 
834 static int
835 uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
836 {
837 	struct vm_page *pg;
838 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
839 
840 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
841 again:
842 	mutex_enter(uvm_loanzero_object.vmobjlock);
843 
844 	/*
845 	 * first, get ahold of our single zero page.
846 	 */
847 
848 	if (__predict_false((pg =
849 			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
850 		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
851 					   UVM_PGA_ZERO)) == NULL) {
852 			mutex_exit(uvm_loanzero_object.vmobjlock);
853 			uvmfault_unlockall(ufi, amap, NULL);
854 			uvm_wait("loanzero");
855 			if (!uvmfault_relock(ufi)) {
856 				return (0);
857 			}
858 			if (amap) {
859 				amap_lock(amap);
860 			}
861 			goto again;
862 		}
863 
864 		/* got a zero'd page. */
865 		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
866 		pg->flags |= PG_RDONLY;
867 		mutex_enter(&uvm_pageqlock);
868 		uvm_pageactivate(pg);
869 		mutex_exit(&uvm_pageqlock);
870 		UVM_PAGE_OWN(pg, NULL);
871 	}
872 
873 	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
874 		mutex_enter(&uvm_pageqlock);
875 		pg->loan_count++;
876 		mutex_exit(&uvm_pageqlock);
877 		mutex_exit(uvm_loanzero_object.vmobjlock);
878 		**output = pg;
879 		(*output)++;
880 		return (1);
881 	}
882 
883 #ifdef notdef
884 	/*
885 	 * loaning to an anon.  check to see if there is already an anon
886 	 * associated with this page.  if so, then just return a reference
887 	 * to this object.
888 	 */
889 
890 	if (pg->uanon) {
891 		anon = pg->uanon;
892 		mutex_enter(&anon->an_lock);
893 		anon->an_ref++;
894 		mutex_exit(&anon->an_lock);
895 		mutex_exit(uvm_loanzero_object.vmobjlock);
896 		**output = anon;
897 		(*output)++;
898 		return (1);
899 	}
900 
901 	/*
902 	 * need to allocate a new anon
903 	 */
904 
905 	anon = uvm_analloc();
906 	if (anon == NULL) {
907 		/* out of swap causes us to fail */
908 		mutex_exit(uvm_loanzero_object.vmobjlock);
909 		uvmfault_unlockall(ufi, amap, NULL, NULL);
910 		return (-1);
911 	}
912 	anon->an_page = pg;
913 	pg->uanon = anon;
914 	mutex_enter(&uvm_pageqlock);
915 	pg->loan_count++;
916 	uvm_pageactivate(pg);
917 	mutex_exit(&uvm_pageqlock);
918 	mutex_exit(&anon->an_lock);
919 	mutex_exit(uvm_loanzero_object.vmobjlock);
920 	**output = anon;
921 	(*output)++;
922 	return (1);
923 #else
924 	return (-1);
925 #endif
926 }
927 
928 
929 /*
930  * uvm_unloananon: kill loans on anons (basically a normal ref drop)
931  *
932  * => we expect all our resources to be unlocked
933  */
934 
935 static void
936 uvm_unloananon(struct vm_anon **aloans, int nanons)
937 {
938 #ifdef notdef
939 	struct vm_anon *anon, *to_free = NULL;
940 
941 	/* TODO: locking */
942 	amap_lock(amap);
943 	while (nanons-- > 0) {
944 		anon = *aloans++;
945 		if (--anon->an_ref == 0) {
946 			anon->an_link = to_free;
947 			to_free = anon;
948 		}
949 	}
950 	uvm_anon_freelst(amap, to_free);
951 #endif	/* notdef */
952 }
953 
954 /*
955  * uvm_unloanpage: kill loans on pages loaned out to the kernel
956  *
957  * => we expect all our resources to be unlocked
958  */
959 
960 static void
961 uvm_unloanpage(struct vm_page **ploans, int npages)
962 {
963 	struct vm_page *pg;
964 	kmutex_t *slock;
965 
966 	mutex_enter(&uvm_pageqlock);
967 	while (npages-- > 0) {
968 		pg = *ploans++;
969 
970 		/*
971 		 * do a little dance to acquire the object or anon lock
972 		 * as appropriate.  we are locking in the wrong order,
973 		 * so we have to do a try-lock here.
974 		 */
975 
976 		slock = NULL;
977 		while (pg->uobject != NULL || pg->uanon != NULL) {
978 			if (pg->uobject != NULL) {
979 				slock = pg->uobject->vmobjlock;
980 			} else {
981 				slock = pg->uanon->an_lock;
982 			}
983 			if (mutex_tryenter(slock)) {
984 				break;
985 			}
986 			/* XXX Better than yielding but inadequate. */
987 			kpause("livelock", false, 1, &uvm_pageqlock);
988 			slock = NULL;
989 		}
990 
991 		/*
992 		 * drop our loan.  if page is owned by an anon but
993 		 * PQ_ANON is not set, the page was loaned to the anon
994 		 * from an object which dropped ownership, so resolve
995 		 * this by turning the anon's loan into real ownership
996 		 * (ie. decrement loan_count again and set PQ_ANON).
997 		 * after all this, if there are no loans left, put the
998 		 * page back a paging queue (if the page is owned by
999 		 * an anon) or free it (if the page is now unowned).
1000 		 */
1001 
1002 		KASSERT(pg->loan_count > 0);
1003 		pg->loan_count--;
1004 		if (pg->uobject == NULL && pg->uanon != NULL &&
1005 		    (pg->pqflags & PQ_ANON) == 0) {
1006 			KASSERT(pg->loan_count > 0);
1007 			pg->loan_count--;
1008 			pg->pqflags |= PQ_ANON;
1009 		}
1010 		if (pg->loan_count == 0 && pg->uobject == NULL &&
1011 		    pg->uanon == NULL) {
1012 			KASSERT((pg->flags & PG_BUSY) == 0);
1013 			uvm_pagefree(pg);
1014 		}
1015 		if (slock != NULL) {
1016 			mutex_exit(slock);
1017 		}
1018 	}
1019 	mutex_exit(&uvm_pageqlock);
1020 }
1021 
1022 /*
1023  * uvm_unloan: kill loans on pages or anons.
1024  */
1025 
1026 void
1027 uvm_unloan(void *v, int npages, int flags)
1028 {
1029 	if (flags & UVM_LOAN_TOANON) {
1030 		uvm_unloananon(v, npages);
1031 	} else {
1032 		uvm_unloanpage(v, npages);
1033 	}
1034 }
1035 
1036 /*
1037  * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1038  * method, because the page can end up on a paging queue, and the
1039  * page daemon will want to call pgo_put when it encounters the page
1040  * on the inactive list.
1041  */
1042 
1043 static int
1044 ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1045 {
1046 	struct vm_page *pg;
1047 
1048 	KDASSERT(uobj == &uvm_loanzero_object);
1049 
1050 	/*
1051 	 * Don't need to do any work here if we're not freeing pages.
1052 	 */
1053 
1054 	if ((flags & PGO_FREE) == 0) {
1055 		mutex_exit(uobj->vmobjlock);
1056 		return 0;
1057 	}
1058 
1059 	/*
1060 	 * we don't actually want to ever free the uvm_loanzero_page, so
1061 	 * just reactivate or dequeue it.
1062 	 */
1063 
1064 	pg = TAILQ_FIRST(&uobj->memq);
1065 	KASSERT(pg != NULL);
1066 	KASSERT(TAILQ_NEXT(pg, listq.queue) == NULL);
1067 
1068 	mutex_enter(&uvm_pageqlock);
1069 	if (pg->uanon)
1070 		uvm_pageactivate(pg);
1071 	else
1072 		uvm_pagedequeue(pg);
1073 	mutex_exit(&uvm_pageqlock);
1074 
1075 	mutex_exit(uobj->vmobjlock);
1076 	return 0;
1077 }
1078 
1079 static const struct uvm_pagerops ulz_pager = {
1080 	.pgo_put = ulz_put,
1081 };
1082 
1083 /*
1084  * uvm_loan_init(): initialize the uvm_loan() facility.
1085  */
1086 
1087 void
1088 uvm_loan_init(void)
1089 {
1090 
1091 	mutex_init(&uvm_loanzero_lock, MUTEX_DEFAULT, IPL_NONE);
1092 	uvm_obj_init(&uvm_loanzero_object, &ulz_pager, false, 0);
1093 	uvm_obj_setlock(&uvm_loanzero_object, &uvm_loanzero_lock);
1094 
1095 	UVMHIST_INIT(loanhist, 300);
1096 }
1097 
1098 /*
1099  * uvm_loanbreak: break loan on a uobj page
1100  *
1101  * => called with uobj locked
1102  * => the page should be busy
1103  * => return value:
1104  *	newly allocated page if succeeded
1105  */
1106 struct vm_page *
1107 uvm_loanbreak(struct vm_page *uobjpage)
1108 {
1109 	struct vm_page *pg;
1110 #ifdef DIAGNOSTIC
1111 	struct uvm_object *uobj = uobjpage->uobject;
1112 #endif
1113 
1114 	KASSERT(uobj != NULL);
1115 	KASSERT(mutex_owned(uobj->vmobjlock));
1116 	KASSERT(uobjpage->flags & PG_BUSY);
1117 
1118 	/* alloc new un-owned page */
1119 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1120 	if (pg == NULL)
1121 		return NULL;
1122 
1123 	/*
1124 	 * copy the data from the old page to the new
1125 	 * one and clear the fake flags on the new page (keep it busy).
1126 	 * force a reload of the old page by clearing it from all
1127 	 * pmaps.
1128 	 * transfer dirtiness of the old page to the new page.
1129 	 * then lock the page queues to rename the pages.
1130 	 */
1131 
1132 	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1133 	pg->flags &= ~PG_FAKE;
1134 	pmap_page_protect(uobjpage, VM_PROT_NONE);
1135 	if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) {
1136 		pmap_clear_modify(pg);
1137 		pg->flags |= PG_CLEAN;
1138 	} else {
1139 		/* uvm_pagecopy marked it dirty */
1140 		KASSERT((pg->flags & PG_CLEAN) == 0);
1141 		/* a object with a dirty page should be dirty. */
1142 		KASSERT(!UVM_OBJ_IS_CLEAN(uobj));
1143 	}
1144 	if (uobjpage->flags & PG_WANTED)
1145 		wakeup(uobjpage);
1146 	/* uobj still locked */
1147 	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1148 	UVM_PAGE_OWN(uobjpage, NULL);
1149 
1150 	mutex_enter(&uvm_pageqlock);
1151 
1152 	/*
1153 	 * replace uobjpage with new page.
1154 	 */
1155 
1156 	uvm_pagereplace(uobjpage, pg);
1157 
1158 	/*
1159 	 * if the page is no longer referenced by
1160 	 * an anon (i.e. we are breaking an O->K
1161 	 * loan), then remove it from any pageq's.
1162 	 */
1163 	if (uobjpage->uanon == NULL)
1164 		uvm_pagedequeue(uobjpage);
1165 
1166 	/*
1167 	 * at this point we have absolutely no
1168 	 * control over uobjpage
1169 	 */
1170 
1171 	/* install new page */
1172 	uvm_pageactivate(pg);
1173 	mutex_exit(&uvm_pageqlock);
1174 
1175 	/*
1176 	 * done!  loan is broken and "pg" is
1177 	 * PG_BUSY.   it can now replace uobjpage.
1178 	 */
1179 
1180 	return pg;
1181 }
1182 
1183 int
1184 uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
1185 {
1186 	struct vm_page *pg;
1187 
1188 	KASSERT(mutex_owned(anon->an_lock));
1189 	KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock));
1190 
1191 	/* get new un-owned replacement page */
1192 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1193 	if (pg == NULL) {
1194 		return ENOMEM;
1195 	}
1196 
1197 	/* copy old -> new */
1198 	uvm_pagecopy(anon->an_page, pg);
1199 
1200 	/* force reload */
1201 	pmap_page_protect(anon->an_page, VM_PROT_NONE);
1202 	mutex_enter(&uvm_pageqlock);	  /* KILL loan */
1203 
1204 	anon->an_page->uanon = NULL;
1205 	/* in case we owned */
1206 	anon->an_page->pqflags &= ~PQ_ANON;
1207 
1208 	if (uobj) {
1209 		/* if we were receiver of loan */
1210 		anon->an_page->loan_count--;
1211 	} else {
1212 		/*
1213 		 * we were the lender (A->K); need to remove the page from
1214 		 * pageq's.
1215 		 */
1216 		uvm_pagedequeue(anon->an_page);
1217 	}
1218 
1219 	if (uobj) {
1220 		mutex_exit(uobj->vmobjlock);
1221 	}
1222 
1223 	/* install new page in anon */
1224 	anon->an_page = pg;
1225 	pg->uanon = anon;
1226 	pg->pqflags |= PQ_ANON;
1227 
1228 	uvm_pageactivate(pg);
1229 	mutex_exit(&uvm_pageqlock);
1230 
1231 	pg->flags &= ~(PG_BUSY|PG_FAKE);
1232 	UVM_PAGE_OWN(pg, NULL);
1233 
1234 	/* done! */
1235 
1236 	return 0;
1237 }
1238