xref: /netbsd-src/sys/uvm/uvm_loan.c (revision 7f21db1c0118155e0dd40b75182e30c589d9f63e)
1 /*	$NetBSD: uvm_loan.c,v 1.77 2010/02/03 14:02:49 uebayasi Exp $	*/
2 
3 /*
4  *
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Charles D. Cranor and
19  *      Washington University.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
35  */
36 
37 /*
38  * uvm_loan.c: page loanout handler
39  */
40 
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.77 2010/02/03 14:02:49 uebayasi Exp $");
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/proc.h>
48 #include <sys/malloc.h>
49 #include <sys/mman.h>
50 
51 #include <uvm/uvm.h>
52 
53 /*
54  * "loaned" pages are pages which are (read-only, copy-on-write) loaned
55  * from the VM system to other parts of the kernel.   this allows page
56  * copying to be avoided (e.g. you can loan pages from objs/anons to
57  * the mbuf system).
58  *
59  * there are 3 types of loans possible:
60  *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
61  *  A->K  anon page to wired kernel page (e.g. mbuf data area)
62  *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
63  * note that it possible to have an O page loaned to both an A and K
64  * at the same time.
65  *
66  * loans are tracked by pg->loan_count.  an O->A page will have both
67  * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
68  * of page is considered "owned" by the uvm_object (not the anon).
69  *
70  * each loan of a page to the kernel bumps the pg->wire_count.  the
71  * kernel mappings for these pages will be read-only and wired.  since
72  * the page will also be wired, it will not be a candidate for pageout,
73  * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
74  * write fault in the kernel to one of these pages will not cause
75  * copy-on-write.  instead, the page fault is considered fatal.  this
76  * is because the kernel mapping will have no way to look up the
77  * object/anon which the page is owned by.  this is a good side-effect,
78  * since a kernel write to a loaned page is an error.
79  *
80  * owners that want to free their pages and discover that they are
81  * loaned out simply "disown" them (the page becomes an orphan).  these
82  * pages should be freed when the last loan is dropped.   in some cases
83  * an anon may "adopt" an orphaned page.
84  *
85  * locking: to read pg->loan_count either the owner or the page queues
86  * must be locked.   to modify pg->loan_count, both the owner of the page
87  * and the PQs must be locked.   pg->flags is (as always) locked by
88  * the owner of the page.
89  *
90  * note that locking from the "loaned" side is tricky since the object
91  * getting the loaned page has no reference to the page's owner and thus
92  * the owner could "die" at any time.   in order to prevent the owner
93  * from dying the page queues should be locked.   this forces us to sometimes
94  * use "try" locking.
95  *
96  * loans are typically broken by the following events:
97  *  1. user-level xwrite fault to a loaned page
98  *  2. pageout of clean+inactive O->A loaned page
99  *  3. owner frees page (e.g. pager flush)
100  *
101  * note that loaning a page causes all mappings of the page to become
102  * read-only (via pmap_page_protect).   this could have an unexpected
103  * effect on normal "wired" pages if one is not careful (XXX).
104  */
105 
106 /*
107  * local prototypes
108  */
109 
110 static int	uvm_loananon(struct uvm_faultinfo *, void ***,
111 			     int, struct vm_anon *);
112 static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
113 			     int, vaddr_t);
114 static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
115 static void	uvm_unloananon(struct vm_anon **, int);
116 static void	uvm_unloanpage(struct vm_page **, int);
117 static int	uvm_loanpage(struct vm_page **, int);
118 
119 
120 /*
121  * inlines
122  */
123 
124 /*
125  * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
126  *
127  * => "ufi" is the result of a successful map lookup (meaning that
128  *	on entry the map is locked by the caller)
129  * => we may unlock and then relock the map if needed (for I/O)
130  * => we put our output result in "output"
131  * => we always return with the map unlocked
132  * => possible return values:
133  *	-1 == error, map is unlocked
134  *	 0 == map relock error (try again!), map is unlocked
135  *	>0 == number of pages we loaned, map is unlocked
136  *
137  * NOTE: We can live with this being an inline, because it is only called
138  * from one place.
139  */
140 
141 static inline int
142 uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
143 {
144 	vaddr_t curaddr = ufi->orig_rvaddr;
145 	vsize_t togo = ufi->size;
146 	struct vm_aref *aref = &ufi->entry->aref;
147 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
148 	struct vm_anon *anon;
149 	int rv, result = 0;
150 
151 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
152 
153 	/*
154 	 * lock us the rest of the way down (we unlock before return)
155 	 */
156 	if (aref->ar_amap)
157 		amap_lock(aref->ar_amap);
158 
159 	/*
160 	 * loop until done
161 	 */
162 	while (togo) {
163 
164 		/*
165 		 * find the page we want.   check the anon layer first.
166 		 */
167 
168 		if (aref->ar_amap) {
169 			anon = amap_lookup(aref, curaddr - ufi->entry->start);
170 		} else {
171 			anon = NULL;
172 		}
173 
174 		/* locked: map, amap, uobj */
175 		if (anon) {
176 			rv = uvm_loananon(ufi, output, flags, anon);
177 		} else if (uobj) {
178 			rv = uvm_loanuobj(ufi, output, flags, curaddr);
179 		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
180 			rv = uvm_loanzero(ufi, output, flags);
181 		} else {
182 			uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL);
183 			rv = -1;
184 		}
185 		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
186 		KASSERT(rv > 0 || aref->ar_amap == NULL ||
187 		    !mutex_owned(&aref->ar_amap->am_l));
188 		KASSERT(rv > 0 || uobj == NULL ||
189 		    !mutex_owned(&uobj->vmobjlock));
190 
191 		/* total failure */
192 		if (rv < 0) {
193 			UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0);
194 			return (-1);
195 		}
196 
197 		/* relock failed, need to do another lookup */
198 		if (rv == 0) {
199 			UVMHIST_LOG(loanhist, "relock failure %d", result
200 			    ,0,0,0);
201 			return (result);
202 		}
203 
204 		/*
205 		 * got it... advance to next page
206 		 */
207 
208 		result++;
209 		togo -= PAGE_SIZE;
210 		curaddr += PAGE_SIZE;
211 	}
212 
213 	/*
214 	 * unlock what we locked, unlock the maps and return
215 	 */
216 
217 	if (aref->ar_amap)
218 		amap_unlock(aref->ar_amap);
219 	uvmfault_unlockmaps(ufi, false);
220 	UVMHIST_LOG(loanhist, "done %d", result, 0,0,0);
221 	return (result);
222 }
223 
224 /*
225  * normal functions
226  */
227 
228 /*
229  * uvm_loan: loan pages in a map out to anons or to the kernel
230  *
231  * => map should be unlocked
232  * => start and len should be multiples of PAGE_SIZE
233  * => result is either an array of anon's or vm_pages (depending on flags)
234  * => flag values: UVM_LOAN_TOANON - loan to anons
235  *                 UVM_LOAN_TOPAGE - loan to wired kernel page
236  *    one and only one of these flags must be set!
237  * => returns 0 (success), or an appropriate error number
238  */
239 
240 int
241 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
242 {
243 	struct uvm_faultinfo ufi;
244 	void **result, **output;
245 	int rv, error;
246 
247 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
248 
249 	/*
250 	 * ensure that one and only one of the flags is set
251 	 */
252 
253 	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
254 		((flags & UVM_LOAN_TOPAGE) == 0));
255 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
256 
257 	/*
258 	 * "output" is a pointer to the current place to put the loaned page.
259 	 */
260 
261 	result = v;
262 	output = &result[0];	/* start at the beginning ... */
263 
264 	/*
265 	 * while we've got pages to do
266 	 */
267 
268 	while (len > 0) {
269 
270 		/*
271 		 * fill in params for a call to uvmfault_lookup
272 		 */
273 
274 		ufi.orig_map = map;
275 		ufi.orig_rvaddr = start;
276 		ufi.orig_size = len;
277 
278 		/*
279 		 * do the lookup, the only time this will fail is if we hit on
280 		 * an unmapped region (an error)
281 		 */
282 
283 		if (!uvmfault_lookup(&ufi, false)) {
284 			error = ENOENT;
285 			goto fail;
286 		}
287 
288 		/*
289 		 * map now locked.  now do the loanout...
290 		 */
291 
292 		rv = uvm_loanentry(&ufi, &output, flags);
293 		if (rv < 0) {
294 			/* all unlocked due to error */
295 			error = EINVAL;
296 			goto fail;
297 		}
298 
299 		/*
300 		 * done!  the map is unlocked.  advance, if possible.
301 		 *
302 		 * XXXCDC: could be recoded to hold the map lock with
303 		 *	   smarter code (but it only happens on map entry
304 		 *	   boundaries, so it isn't that bad).
305 		 */
306 
307 		if (rv) {
308 			rv <<= PAGE_SHIFT;
309 			len -= rv;
310 			start += rv;
311 		}
312 	}
313 	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
314 	return 0;
315 
316 fail:
317 	/*
318 	 * failed to complete loans.  drop any loans and return failure code.
319 	 * map is already unlocked.
320 	 */
321 
322 	if (output - result) {
323 		if (flags & UVM_LOAN_TOANON) {
324 			uvm_unloananon((struct vm_anon **)result,
325 			    output - result);
326 		} else {
327 			uvm_unloanpage((struct vm_page **)result,
328 			    output - result);
329 		}
330 	}
331 	UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
332 	return (error);
333 }
334 
335 /*
336  * uvm_loananon: loan a page from an anon out
337  *
338  * => called with map, amap, uobj locked
339  * => return value:
340  *	-1 = fatal error, everything is unlocked, abort.
341  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
342  *		try again
343  *	 1 = got it, everything still locked
344  */
345 
346 int
347 uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
348     struct vm_anon *anon)
349 {
350 	struct vm_page *pg;
351 	int error;
352 
353 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
354 
355 	/*
356 	 * if we are loaning to "another" anon then it is easy, we just
357 	 * bump the reference count on the current anon and return a
358 	 * pointer to it (it becomes copy-on-write shared).
359 	 */
360 
361 	if (flags & UVM_LOAN_TOANON) {
362 		mutex_enter(&anon->an_lock);
363 		pg = anon->an_page;
364 		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
365 			if (pg->wire_count > 0) {
366 				UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0);
367 				uvmfault_unlockall(ufi,
368 				    ufi->entry->aref.ar_amap,
369 				    ufi->entry->object.uvm_obj, anon);
370 				return (-1);
371 			}
372 			pmap_page_protect(pg, VM_PROT_READ);
373 		}
374 		anon->an_ref++;
375 		**output = anon;
376 		(*output)++;
377 		mutex_exit(&anon->an_lock);
378 		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
379 		return (1);
380 	}
381 
382 	/*
383 	 * we are loaning to a kernel-page.   we need to get the page
384 	 * resident so we can wire it.   uvmfault_anonget will handle
385 	 * this for us.
386 	 */
387 
388 	mutex_enter(&anon->an_lock);
389 	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
390 
391 	/*
392 	 * if we were unable to get the anon, then uvmfault_anonget has
393 	 * unlocked everything and returned an error code.
394 	 */
395 
396 	if (error) {
397 		UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
398 
399 		/* need to refault (i.e. refresh our lookup) ? */
400 		if (error == ERESTART) {
401 			return (0);
402 		}
403 
404 		/* "try again"?   sleep a bit and retry ... */
405 		if (error == EAGAIN) {
406 			kpause("loanagain", false, hz/2, NULL);
407 			return (0);
408 		}
409 
410 		/* otherwise flag it as an error */
411 		return (-1);
412 	}
413 
414 	/*
415 	 * we have the page and its owner locked: do the loan now.
416 	 */
417 
418 	pg = anon->an_page;
419 	mutex_enter(&uvm_pageqlock);
420 	if (pg->wire_count > 0) {
421 		mutex_exit(&uvm_pageqlock);
422 		UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0);
423 		KASSERT(pg->uobject == NULL);
424 		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
425 		    NULL, anon);
426 		return (-1);
427 	}
428 	if (pg->loan_count == 0) {
429 		pmap_page_protect(pg, VM_PROT_READ);
430 	}
431 	pg->loan_count++;
432 	uvm_pageactivate(pg);
433 	mutex_exit(&uvm_pageqlock);
434 	**output = pg;
435 	(*output)++;
436 
437 	/* unlock anon and return success */
438 	if (pg->uobject)
439 		mutex_exit(&pg->uobject->vmobjlock);
440 	mutex_exit(&anon->an_lock);
441 	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
442 	return (1);
443 }
444 
445 /*
446  * uvm_loanpage: loan out pages to kernel (->K)
447  *
448  * => pages should be object-owned and the object should be locked.
449  * => in the case of error, the object might be unlocked and relocked.
450  * => caller should busy the pages beforehand.
451  * => pages will be unbusied.
452  * => fail with EBUSY if meet a wired page.
453  */
454 static int
455 uvm_loanpage(struct vm_page **pgpp, int npages)
456 {
457 	int i;
458 	int error = 0;
459 
460 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
461 
462 	for (i = 0; i < npages; i++) {
463 		struct vm_page *pg = pgpp[i];
464 
465 		KASSERT(pg->uobject != NULL);
466 		KASSERT(pg->uobject == pgpp[0]->uobject);
467 		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
468 		KASSERT(mutex_owned(&pg->uobject->vmobjlock));
469 		KASSERT(pg->flags & PG_BUSY);
470 
471 		mutex_enter(&uvm_pageqlock);
472 		if (pg->wire_count > 0) {
473 			mutex_exit(&uvm_pageqlock);
474 			UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
475 			error = EBUSY;
476 			break;
477 		}
478 		if (pg->loan_count == 0) {
479 			pmap_page_protect(pg, VM_PROT_READ);
480 		}
481 		pg->loan_count++;
482 		uvm_pageactivate(pg);
483 		mutex_exit(&uvm_pageqlock);
484 	}
485 
486 	uvm_page_unbusy(pgpp, npages);
487 
488 	if (error) {
489 		/*
490 		 * backout what we've done
491 		 */
492 		kmutex_t *slock = &pgpp[0]->uobject->vmobjlock;
493 
494 		mutex_exit(slock);
495 		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
496 		mutex_enter(slock);
497 	}
498 
499 	UVMHIST_LOG(loanhist, "done %d", error,0,0,0);
500 	return error;
501 }
502 
503 /*
504  * XXX UBC temp limit
505  * number of pages to get at once.
506  * should be <= MAX_READ_AHEAD in genfs_vnops.c
507  */
508 #define	UVM_LOAN_GET_CHUNK	16
509 
510 /*
511  * uvm_loanuobjpages: loan pages from a uobj out (O->K)
512  *
513  * => uobj shouldn't be locked.  (we'll lock it)
514  * => fail with EBUSY if we meet a wired page.
515  */
516 int
517 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
518     struct vm_page **origpgpp)
519 {
520 	int ndone; /* # of pages loaned out */
521 	struct vm_page **pgpp;
522 	int error;
523 	int i;
524 	kmutex_t *slock;
525 
526 	pgpp = origpgpp;
527 	for (ndone = 0; ndone < orignpages; ) {
528 		int npages;
529 		/* npendloan: # of pages busied but not loand out yet. */
530 		int npendloan = 0xdead; /* XXX gcc */
531 reget:
532 		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
533 		mutex_enter(&uobj->vmobjlock);
534 		error = (*uobj->pgops->pgo_get)(uobj,
535 		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
536 		    VM_PROT_READ, 0, PGO_SYNCIO);
537 		if (error == EAGAIN) {
538 			kpause("loanuopg", false, hz/2, NULL);
539 			continue;
540 		}
541 		if (error)
542 			goto fail;
543 
544 		KASSERT(npages > 0);
545 
546 		/* loan and unbusy pages */
547 		slock = NULL;
548 		for (i = 0; i < npages; i++) {
549 			kmutex_t *nextslock; /* slock for next page */
550 			struct vm_page *pg = *pgpp;
551 
552 			/* XXX assuming that the page is owned by uobj */
553 			KASSERT(pg->uobject != NULL);
554 			nextslock = &pg->uobject->vmobjlock;
555 
556 			if (slock != nextslock) {
557 				if (slock) {
558 					KASSERT(npendloan > 0);
559 					error = uvm_loanpage(pgpp - npendloan,
560 					    npendloan);
561 					mutex_exit(slock);
562 					if (error)
563 						goto fail;
564 					ndone += npendloan;
565 					KASSERT(origpgpp + ndone == pgpp);
566 				}
567 				slock = nextslock;
568 				npendloan = 0;
569 				mutex_enter(slock);
570 			}
571 
572 			if ((pg->flags & PG_RELEASED) != 0) {
573 				/*
574 				 * release pages and try again.
575 				 */
576 				mutex_exit(slock);
577 				for (; i < npages; i++) {
578 					pg = pgpp[i];
579 					slock = &pg->uobject->vmobjlock;
580 
581 					mutex_enter(slock);
582 					mutex_enter(&uvm_pageqlock);
583 					uvm_page_unbusy(&pg, 1);
584 					mutex_exit(&uvm_pageqlock);
585 					mutex_exit(slock);
586 				}
587 				goto reget;
588 			}
589 
590 			npendloan++;
591 			pgpp++;
592 			KASSERT(origpgpp + ndone + npendloan == pgpp);
593 		}
594 		KASSERT(slock != NULL);
595 		KASSERT(npendloan > 0);
596 		error = uvm_loanpage(pgpp - npendloan, npendloan);
597 		mutex_exit(slock);
598 		if (error)
599 			goto fail;
600 		ndone += npendloan;
601 		KASSERT(origpgpp + ndone == pgpp);
602 	}
603 
604 	return 0;
605 
606 fail:
607 	uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
608 
609 	return error;
610 }
611 
612 /*
613  * uvm_loanuobj: loan a page from a uobj out
614  *
615  * => called with map, amap, uobj locked
616  * => return value:
617  *	-1 = fatal error, everything is unlocked, abort.
618  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
619  *		try again
620  *	 1 = got it, everything still locked
621  */
622 
623 static int
624 uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
625 {
626 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
627 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
628 	struct vm_page *pg;
629 	struct vm_anon *anon;
630 	int error, npages;
631 	bool locked;
632 
633 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
634 
635 	/*
636 	 * first we must make sure the page is resident.
637 	 *
638 	 * XXXCDC: duplicate code with uvm_fault().
639 	 */
640 
641 	/* locked: maps(read), amap(if there) */
642 	mutex_enter(&uobj->vmobjlock);
643 	/* locked: maps(read), amap(if there), uobj */
644 
645 	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
646 		npages = 1;
647 		pg = NULL;
648 		error = (*uobj->pgops->pgo_get)(uobj,
649 		    va - ufi->entry->start + ufi->entry->offset,
650 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
651 	} else {
652 		error = EIO;		/* must have pgo_get op */
653 	}
654 
655 	/*
656 	 * check the result of the locked pgo_get.  if there is a problem,
657 	 * then we fail the loan.
658 	 */
659 
660 	if (error && error != EBUSY) {
661 		uvmfault_unlockall(ufi, amap, uobj, NULL);
662 		return (-1);
663 	}
664 
665 	/*
666 	 * if we need to unlock for I/O, do so now.
667 	 */
668 
669 	if (error == EBUSY) {
670 		uvmfault_unlockall(ufi, amap, NULL, NULL);
671 
672 		/* locked: uobj */
673 		npages = 1;
674 		error = (*uobj->pgops->pgo_get)(uobj,
675 		    va - ufi->entry->start + ufi->entry->offset,
676 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
677 		/* locked: <nothing> */
678 
679 		if (error) {
680 			if (error == EAGAIN) {
681 				kpause("fltagain2", false, hz/2, NULL);
682 				return (0);
683 			}
684 			return (-1);
685 		}
686 
687 		/*
688 		 * pgo_get was a success.   attempt to relock everything.
689 		 */
690 
691 		locked = uvmfault_relock(ufi);
692 		if (locked && amap)
693 			amap_lock(amap);
694 		uobj = pg->uobject;
695 		mutex_enter(&uobj->vmobjlock);
696 
697 		/*
698 		 * verify that the page has not be released and re-verify
699 		 * that amap slot is still free.   if there is a problem we
700 		 * drop our lock (thus force a lookup refresh/retry).
701 		 */
702 
703 		if ((pg->flags & PG_RELEASED) != 0 ||
704 		    (locked && amap && amap_lookup(&ufi->entry->aref,
705 		    ufi->orig_rvaddr - ufi->entry->start))) {
706 			if (locked)
707 				uvmfault_unlockall(ufi, amap, NULL, NULL);
708 			locked = false;
709 		}
710 
711 		/*
712 		 * didn't get the lock?   release the page and retry.
713 		 */
714 
715 		if (locked == false) {
716 			if (pg->flags & PG_WANTED) {
717 				wakeup(pg);
718 			}
719 			if (pg->flags & PG_RELEASED) {
720 				mutex_enter(&uvm_pageqlock);
721 				uvm_pagefree(pg);
722 				mutex_exit(&uvm_pageqlock);
723 				mutex_exit(&uobj->vmobjlock);
724 				return (0);
725 			}
726 			mutex_enter(&uvm_pageqlock);
727 			uvm_pageactivate(pg);
728 			mutex_exit(&uvm_pageqlock);
729 			pg->flags &= ~(PG_BUSY|PG_WANTED);
730 			UVM_PAGE_OWN(pg, NULL);
731 			mutex_exit(&uobj->vmobjlock);
732 			return (0);
733 		}
734 	}
735 
736 	KASSERT(uobj == pg->uobject);
737 
738 	/*
739 	 * at this point we have the page we want ("pg") marked PG_BUSY for us
740 	 * and we have all data structures locked.  do the loanout.  page can
741 	 * not be PG_RELEASED (we caught this above).
742 	 */
743 
744 	if ((flags & UVM_LOAN_TOANON) == 0) {
745 		if (uvm_loanpage(&pg, 1)) {
746 			uvmfault_unlockall(ufi, amap, uobj, NULL);
747 			return (-1);
748 		}
749 		mutex_exit(&uobj->vmobjlock);
750 		**output = pg;
751 		(*output)++;
752 		return (1);
753 	}
754 
755 	/*
756 	 * must be a loan to an anon.   check to see if there is already
757 	 * an anon associated with this page.  if so, then just return
758 	 * a reference to this object.   the page should already be
759 	 * mapped read-only because it is already on loan.
760 	 */
761 
762 	if (pg->uanon) {
763 		anon = pg->uanon;
764 		mutex_enter(&anon->an_lock);
765 		anon->an_ref++;
766 		mutex_exit(&anon->an_lock);
767 		if (pg->flags & PG_WANTED) {
768 			wakeup(pg);
769 		}
770 		pg->flags &= ~(PG_WANTED|PG_BUSY);
771 		UVM_PAGE_OWN(pg, NULL);
772 		mutex_exit(&uobj->vmobjlock);
773 		**output = anon;
774 		(*output)++;
775 		return (1);
776 	}
777 
778 	/*
779 	 * need to allocate a new anon
780 	 */
781 
782 	anon = uvm_analloc();
783 	if (anon == NULL) {
784 		goto fail;
785 	}
786 	anon->an_page = pg;
787 	pg->uanon = anon;
788 	mutex_enter(&uvm_pageqlock);
789 	if (pg->wire_count > 0) {
790 		mutex_exit(&uvm_pageqlock);
791 		UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
792 		pg->uanon = NULL;
793 		anon->an_page = NULL;
794 		anon->an_ref--;
795 		mutex_exit(&anon->an_lock);
796 		uvm_anfree(anon);
797 		goto fail;
798 	}
799 	if (pg->loan_count == 0) {
800 		pmap_page_protect(pg, VM_PROT_READ);
801 	}
802 	pg->loan_count++;
803 	uvm_pageactivate(pg);
804 	mutex_exit(&uvm_pageqlock);
805 	if (pg->flags & PG_WANTED) {
806 		wakeup(pg);
807 	}
808 	pg->flags &= ~(PG_WANTED|PG_BUSY);
809 	UVM_PAGE_OWN(pg, NULL);
810 	mutex_exit(&uobj->vmobjlock);
811 	mutex_exit(&anon->an_lock);
812 	**output = anon;
813 	(*output)++;
814 	return (1);
815 
816 fail:
817 	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
818 	/*
819 	 * unlock everything and bail out.
820 	 */
821 	if (pg->flags & PG_WANTED) {
822 		wakeup(pg);
823 	}
824 	pg->flags &= ~(PG_WANTED|PG_BUSY);
825 	UVM_PAGE_OWN(pg, NULL);
826 	uvmfault_unlockall(ufi, amap, uobj, NULL);
827 	return (-1);
828 }
829 
830 /*
831  * uvm_loanzero: loan a zero-fill page out
832  *
833  * => called with map, amap, uobj locked
834  * => return value:
835  *	-1 = fatal error, everything is unlocked, abort.
836  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
837  *		try again
838  *	 1 = got it, everything still locked
839  */
840 
841 static struct uvm_object uvm_loanzero_object;
842 
843 static int
844 uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
845 {
846 	struct vm_anon *anon;
847 	struct vm_page *pg;
848 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
849 
850 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
851 again:
852 	mutex_enter(&uvm_loanzero_object.vmobjlock);
853 
854 	/*
855 	 * first, get ahold of our single zero page.
856 	 */
857 
858 	if (__predict_false((pg =
859 			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
860 		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
861 					   UVM_PGA_ZERO)) == NULL) {
862 			mutex_exit(&uvm_loanzero_object.vmobjlock);
863 			uvmfault_unlockall(ufi, amap, NULL, NULL);
864 			uvm_wait("loanzero");
865 			if (!uvmfault_relock(ufi)) {
866 				return (0);
867 			}
868 			if (amap) {
869 				amap_lock(amap);
870 			}
871 			goto again;
872 		}
873 
874 		/* got a zero'd page. */
875 		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
876 		pg->flags |= PG_RDONLY;
877 		mutex_enter(&uvm_pageqlock);
878 		uvm_pageactivate(pg);
879 		mutex_exit(&uvm_pageqlock);
880 		UVM_PAGE_OWN(pg, NULL);
881 	}
882 
883 	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
884 		mutex_enter(&uvm_pageqlock);
885 		pg->loan_count++;
886 		mutex_exit(&uvm_pageqlock);
887 		mutex_exit(&uvm_loanzero_object.vmobjlock);
888 		**output = pg;
889 		(*output)++;
890 		return (1);
891 	}
892 
893 	/*
894 	 * loaning to an anon.  check to see if there is already an anon
895 	 * associated with this page.  if so, then just return a reference
896 	 * to this object.
897 	 */
898 
899 	if (pg->uanon) {
900 		anon = pg->uanon;
901 		mutex_enter(&anon->an_lock);
902 		anon->an_ref++;
903 		mutex_exit(&anon->an_lock);
904 		mutex_exit(&uvm_loanzero_object.vmobjlock);
905 		**output = anon;
906 		(*output)++;
907 		return (1);
908 	}
909 
910 	/*
911 	 * need to allocate a new anon
912 	 */
913 
914 	anon = uvm_analloc();
915 	if (anon == NULL) {
916 		/* out of swap causes us to fail */
917 		mutex_exit(&uvm_loanzero_object.vmobjlock);
918 		uvmfault_unlockall(ufi, amap, NULL, NULL);
919 		return (-1);
920 	}
921 	anon->an_page = pg;
922 	pg->uanon = anon;
923 	mutex_enter(&uvm_pageqlock);
924 	pg->loan_count++;
925 	uvm_pageactivate(pg);
926 	mutex_exit(&uvm_pageqlock);
927 	mutex_exit(&anon->an_lock);
928 	mutex_exit(&uvm_loanzero_object.vmobjlock);
929 	**output = anon;
930 	(*output)++;
931 	return (1);
932 }
933 
934 
935 /*
936  * uvm_unloananon: kill loans on anons (basically a normal ref drop)
937  *
938  * => we expect all our resources to be unlocked
939  */
940 
941 static void
942 uvm_unloananon(struct vm_anon **aloans, int nanons)
943 {
944 	struct vm_anon *anon;
945 
946 	while (nanons-- > 0) {
947 		int refs;
948 
949 		anon = *aloans++;
950 		mutex_enter(&anon->an_lock);
951 		refs = --anon->an_ref;
952 		mutex_exit(&anon->an_lock);
953 
954 		if (refs == 0) {
955 			uvm_anfree(anon);
956 		}
957 	}
958 }
959 
960 /*
961  * uvm_unloanpage: kill loans on pages loaned out to the kernel
962  *
963  * => we expect all our resources to be unlocked
964  */
965 
966 static void
967 uvm_unloanpage(struct vm_page **ploans, int npages)
968 {
969 	struct vm_page *pg;
970 	kmutex_t *slock;
971 
972 	mutex_enter(&uvm_pageqlock);
973 	while (npages-- > 0) {
974 		pg = *ploans++;
975 
976 		/*
977 		 * do a little dance to acquire the object or anon lock
978 		 * as appropriate.  we are locking in the wrong order,
979 		 * so we have to do a try-lock here.
980 		 */
981 
982 		slock = NULL;
983 		while (pg->uobject != NULL || pg->uanon != NULL) {
984 			if (pg->uobject != NULL) {
985 				slock = &pg->uobject->vmobjlock;
986 			} else {
987 				slock = &pg->uanon->an_lock;
988 			}
989 			if (mutex_tryenter(slock)) {
990 				break;
991 			}
992 			mutex_exit(&uvm_pageqlock);
993 			/* XXX Better than yielding but inadequate. */
994 			kpause("livelock", false, 1, NULL);
995 			mutex_enter(&uvm_pageqlock);
996 			slock = NULL;
997 		}
998 
999 		/*
1000 		 * drop our loan.  if page is owned by an anon but
1001 		 * PQ_ANON is not set, the page was loaned to the anon
1002 		 * from an object which dropped ownership, so resolve
1003 		 * this by turning the anon's loan into real ownership
1004 		 * (ie. decrement loan_count again and set PQ_ANON).
1005 		 * after all this, if there are no loans left, put the
1006 		 * page back a paging queue (if the page is owned by
1007 		 * an anon) or free it (if the page is now unowned).
1008 		 */
1009 
1010 		KASSERT(pg->loan_count > 0);
1011 		pg->loan_count--;
1012 		if (pg->uobject == NULL && pg->uanon != NULL &&
1013 		    (pg->pqflags & PQ_ANON) == 0) {
1014 			KASSERT(pg->loan_count > 0);
1015 			pg->loan_count--;
1016 			pg->pqflags |= PQ_ANON;
1017 		}
1018 		if (pg->loan_count == 0 && pg->uobject == NULL &&
1019 		    pg->uanon == NULL) {
1020 			KASSERT((pg->flags & PG_BUSY) == 0);
1021 			uvm_pagefree(pg);
1022 		}
1023 		if (slock != NULL) {
1024 			mutex_exit(slock);
1025 		}
1026 	}
1027 	mutex_exit(&uvm_pageqlock);
1028 }
1029 
1030 /*
1031  * uvm_unloan: kill loans on pages or anons.
1032  */
1033 
1034 void
1035 uvm_unloan(void *v, int npages, int flags)
1036 {
1037 	if (flags & UVM_LOAN_TOANON) {
1038 		uvm_unloananon(v, npages);
1039 	} else {
1040 		uvm_unloanpage(v, npages);
1041 	}
1042 }
1043 
1044 /*
1045  * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1046  * method, because the page can end up on a paging queue, and the
1047  * page daemon will want to call pgo_put when it encounters the page
1048  * on the inactive list.
1049  */
1050 
1051 static int
1052 ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1053 {
1054 	struct vm_page *pg;
1055 
1056 	KDASSERT(uobj == &uvm_loanzero_object);
1057 
1058 	/*
1059 	 * Don't need to do any work here if we're not freeing pages.
1060 	 */
1061 
1062 	if ((flags & PGO_FREE) == 0) {
1063 		mutex_exit(&uobj->vmobjlock);
1064 		return 0;
1065 	}
1066 
1067 	/*
1068 	 * we don't actually want to ever free the uvm_loanzero_page, so
1069 	 * just reactivate or dequeue it.
1070 	 */
1071 
1072 	pg = TAILQ_FIRST(&uobj->memq);
1073 	KASSERT(pg != NULL);
1074 	KASSERT(TAILQ_NEXT(pg, listq.queue) == NULL);
1075 
1076 	mutex_enter(&uvm_pageqlock);
1077 	if (pg->uanon)
1078 		uvm_pageactivate(pg);
1079 	else
1080 		uvm_pagedequeue(pg);
1081 	mutex_exit(&uvm_pageqlock);
1082 
1083 	mutex_exit(&uobj->vmobjlock);
1084 	return 0;
1085 }
1086 
1087 static const struct uvm_pagerops ulz_pager = {
1088 	.pgo_put = ulz_put,
1089 };
1090 
1091 /*
1092  * uvm_loan_init(): initialize the uvm_loan() facility.
1093  */
1094 
1095 void
1096 uvm_loan_init(void)
1097 {
1098 
1099 	UVM_OBJ_INIT(&uvm_loanzero_object, &ulz_pager, 0);
1100 
1101 	UVMHIST_INIT(loanhist, 300);
1102 }
1103 
1104 /*
1105  * uvm_loanbreak: break loan on a uobj page
1106  *
1107  * => called with uobj locked
1108  * => the page should be busy
1109  * => return value:
1110  *	newly allocated page if succeeded
1111  */
1112 struct vm_page *
1113 uvm_loanbreak(struct vm_page *uobjpage)
1114 {
1115 	struct vm_page *pg;
1116 #ifdef DIAGNOSTIC
1117 	struct uvm_object *uobj = uobjpage->uobject;
1118 #endif
1119 
1120 	KASSERT(uobj != NULL);
1121 	KASSERT(mutex_owned(&uobj->vmobjlock));
1122 	KASSERT(uobjpage->flags & PG_BUSY);
1123 
1124 	/* alloc new un-owned page */
1125 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1126 	if (pg == NULL)
1127 		return NULL;
1128 
1129 	/*
1130 	 * copy the data from the old page to the new
1131 	 * one and clear the fake flags on the new page (keep it busy).
1132 	 * force a reload of the old page by clearing it from all
1133 	 * pmaps.
1134 	 * transfer dirtiness of the old page to the new page.
1135 	 * then lock the page queues to rename the pages.
1136 	 */
1137 
1138 	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1139 	pg->flags &= ~PG_FAKE;
1140 	pmap_page_protect(uobjpage, VM_PROT_NONE);
1141 	if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) {
1142 		pmap_clear_modify(pg);
1143 		pg->flags |= PG_CLEAN;
1144 	} else {
1145 		/* uvm_pagecopy marked it dirty */
1146 		KASSERT((pg->flags & PG_CLEAN) == 0);
1147 		/* a object with a dirty page should be dirty. */
1148 		KASSERT(!UVM_OBJ_IS_CLEAN(uobj));
1149 	}
1150 	if (uobjpage->flags & PG_WANTED)
1151 		wakeup(uobjpage);
1152 	/* uobj still locked */
1153 	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1154 	UVM_PAGE_OWN(uobjpage, NULL);
1155 
1156 	mutex_enter(&uvm_pageqlock);
1157 
1158 	/*
1159 	 * replace uobjpage with new page.
1160 	 */
1161 
1162 	uvm_pagereplace(uobjpage, pg);
1163 
1164 	/*
1165 	 * if the page is no longer referenced by
1166 	 * an anon (i.e. we are breaking an O->K
1167 	 * loan), then remove it from any pageq's.
1168 	 */
1169 	if (uobjpage->uanon == NULL)
1170 		uvm_pagedequeue(uobjpage);
1171 
1172 	/*
1173 	 * at this point we have absolutely no
1174 	 * control over uobjpage
1175 	 */
1176 
1177 	/* install new page */
1178 	uvm_pageactivate(pg);
1179 	mutex_exit(&uvm_pageqlock);
1180 
1181 	/*
1182 	 * done!  loan is broken and "pg" is
1183 	 * PG_BUSY.   it can now replace uobjpage.
1184 	 */
1185 
1186 	return pg;
1187 }
1188 
1189 int
1190 uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj)
1191 {
1192 	struct vm_page *pg;
1193 
1194 	KASSERT(mutex_owned(&anon->an_lock));
1195 	KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock));
1196 
1197 	/* get new un-owned replacement page */
1198 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1199 	if (pg == NULL) {
1200 		return ENOMEM;
1201 	}
1202 
1203 	/*
1204 	 * copy data, kill loan, and drop uobj lock (if any)
1205 	 */
1206 	/* copy old -> new */
1207 	uvm_pagecopy(anon->an_page, pg);
1208 
1209 	/* force reload */
1210 	pmap_page_protect(anon->an_page, VM_PROT_NONE);
1211 	mutex_enter(&uvm_pageqlock);	  /* KILL loan */
1212 
1213 	anon->an_page->uanon = NULL;
1214 	/* in case we owned */
1215 	anon->an_page->pqflags &= ~PQ_ANON;
1216 
1217 	if (uobj) {
1218 		/* if we were receiver of loan */
1219 		anon->an_page->loan_count--;
1220 	} else {
1221 		/*
1222 		 * we were the lender (A->K); need to remove the page from
1223 		 * pageq's.
1224 		 */
1225 		uvm_pagedequeue(anon->an_page);
1226 	}
1227 
1228 	if (uobj) {
1229 		mutex_exit(&uobj->vmobjlock);
1230 	}
1231 
1232 	/* install new page in anon */
1233 	anon->an_page = pg;
1234 	pg->uanon = anon;
1235 	pg->pqflags |= PQ_ANON;
1236 
1237 	uvm_pageactivate(pg);
1238 	mutex_exit(&uvm_pageqlock);
1239 
1240 	pg->flags &= ~(PG_BUSY|PG_FAKE);
1241 	UVM_PAGE_OWN(pg, NULL);
1242 
1243 	/* done! */
1244 
1245 	return 0;
1246 }
1247