xref: /netbsd-src/sys/uvm/uvm_loan.c (revision e5548b402ae4c44fb816de42c7bba9581ce23ef5)
1 /*	$NetBSD: uvm_loan.c,v 1.56 2005/12/11 12:25:29 christos Exp $	*/
2 
3 /*
4  *
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Charles D. Cranor and
19  *      Washington University.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
35  */
36 
37 /*
38  * uvm_loan.c: page loanout handler
39  */
40 
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.56 2005/12/11 12:25:29 christos Exp $");
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/proc.h>
48 #include <sys/malloc.h>
49 #include <sys/mman.h>
50 
51 #include <uvm/uvm.h>
52 
53 /*
54  * "loaned" pages are pages which are (read-only, copy-on-write) loaned
55  * from the VM system to other parts of the kernel.   this allows page
56  * copying to be avoided (e.g. you can loan pages from objs/anons to
57  * the mbuf system).
58  *
59  * there are 3 types of loans possible:
60  *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
61  *  A->K  anon page to wired kernel page (e.g. mbuf data area)
62  *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
63  * note that it possible to have an O page loaned to both an A and K
64  * at the same time.
65  *
66  * loans are tracked by pg->loan_count.  an O->A page will have both
67  * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
68  * of page is considered "owned" by the uvm_object (not the anon).
69  *
70  * each loan of a page to the kernel bumps the pg->wire_count.  the
71  * kernel mappings for these pages will be read-only and wired.  since
72  * the page will also be wired, it will not be a candidate for pageout,
73  * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
74  * write fault in the kernel to one of these pages will not cause
75  * copy-on-write.  instead, the page fault is considered fatal.  this
76  * is because the kernel mapping will have no way to look up the
77  * object/anon which the page is owned by.  this is a good side-effect,
78  * since a kernel write to a loaned page is an error.
79  *
80  * owners that want to free their pages and discover that they are
81  * loaned out simply "disown" them (the page becomes an orphan).  these
82  * pages should be freed when the last loan is dropped.   in some cases
83  * an anon may "adopt" an orphaned page.
84  *
85  * locking: to read pg->loan_count either the owner or the page queues
86  * must be locked.   to modify pg->loan_count, both the owner of the page
87  * and the PQs must be locked.   pg->flags is (as always) locked by
88  * the owner of the page.
89  *
90  * note that locking from the "loaned" side is tricky since the object
91  * getting the loaned page has no reference to the page's owner and thus
92  * the owner could "die" at any time.   in order to prevent the owner
93  * from dying the page queues should be locked.   this forces us to sometimes
94  * use "try" locking.
95  *
96  * loans are typically broken by the following events:
97  *  1. user-level xwrite fault to a loaned page
98  *  2. pageout of clean+inactive O->A loaned page
99  *  3. owner frees page (e.g. pager flush)
100  *
101  * note that loaning a page causes all mappings of the page to become
102  * read-only (via pmap_page_protect).   this could have an unexpected
103  * effect on normal "wired" pages if one is not careful (XXX).
104  */
105 
106 /*
107  * local prototypes
108  */
109 
110 static int	uvm_loananon(struct uvm_faultinfo *, void ***,
111 			     int, struct vm_anon *);
112 static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
113 			     int, vaddr_t);
114 static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
115 static void	uvm_unloananon(struct vm_anon **, int);
116 static void	uvm_unloanpage(struct vm_page **, int);
117 static int	uvm_loanpage(struct vm_page **, int);
118 
119 
120 /*
121  * inlines
122  */
123 
124 /*
125  * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
126  *
127  * => "ufi" is the result of a successful map lookup (meaning that
128  *	on entry the map is locked by the caller)
129  * => we may unlock and then relock the map if needed (for I/O)
130  * => we put our output result in "output"
131  * => we always return with the map unlocked
132  * => possible return values:
133  *	-1 == error, map is unlocked
134  *	 0 == map relock error (try again!), map is unlocked
135  *	>0 == number of pages we loaned, map is unlocked
136  *
137  * NOTE: We can live with this being an inline, because it is only called
138  * from one place.
139  */
140 
141 static __inline int
142 uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags)
143 {
144 	vaddr_t curaddr = ufi->orig_rvaddr;
145 	vsize_t togo = ufi->size;
146 	struct vm_aref *aref = &ufi->entry->aref;
147 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
148 	struct vm_anon *anon;
149 	int rv, result = 0;
150 
151 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
152 
153 	/*
154 	 * lock us the rest of the way down (we unlock before return)
155 	 */
156 	if (aref->ar_amap)
157 		amap_lock(aref->ar_amap);
158 	if (uobj)
159 		simple_lock(&uobj->vmobjlock);
160 
161 	/*
162 	 * loop until done
163 	 */
164 	while (togo) {
165 
166 		/*
167 		 * find the page we want.   check the anon layer first.
168 		 */
169 
170 		if (aref->ar_amap) {
171 			anon = amap_lookup(aref, curaddr - ufi->entry->start);
172 		} else {
173 			anon = NULL;
174 		}
175 
176 		/* locked: map, amap, uobj */
177 		if (anon) {
178 			rv = uvm_loananon(ufi, output, flags, anon);
179 		} else if (uobj) {
180 			rv = uvm_loanuobj(ufi, output, flags, curaddr);
181 		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
182 			rv = uvm_loanzero(ufi, output, flags);
183 		} else {
184 			uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL);
185 			rv = -1;
186 		}
187 		/* locked: if (rv > 0) => map, amap, uobj  [o.w. unlocked] */
188 		LOCK_ASSERT(rv > 0 || aref->ar_amap == NULL ||
189 		    !simple_lock_held(&aref->ar_amap->am_l));
190 		LOCK_ASSERT(rv > 0 || uobj == NULL ||
191 		    !simple_lock_held(&uobj->vmobjlock));
192 
193 		/* total failure */
194 		if (rv < 0) {
195 			UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0);
196 			return (-1);
197 		}
198 
199 		/* relock failed, need to do another lookup */
200 		if (rv == 0) {
201 			UVMHIST_LOG(loanhist, "relock failure %d", result
202 			    ,0,0,0);
203 			return (result);
204 		}
205 
206 		/*
207 		 * got it... advance to next page
208 		 */
209 
210 		result++;
211 		togo -= PAGE_SIZE;
212 		curaddr += PAGE_SIZE;
213 	}
214 
215 	/*
216 	 * unlock what we locked, unlock the maps and return
217 	 */
218 
219 	if (aref->ar_amap)
220 		amap_unlock(aref->ar_amap);
221 	if (uobj)
222 		simple_unlock(&uobj->vmobjlock);
223 	uvmfault_unlockmaps(ufi, FALSE);
224 	UVMHIST_LOG(loanhist, "done %d", result, 0,0,0);
225 	return (result);
226 }
227 
228 /*
229  * normal functions
230  */
231 
232 /*
233  * uvm_loan: loan pages in a map out to anons or to the kernel
234  *
235  * => map should be unlocked
236  * => start and len should be multiples of PAGE_SIZE
237  * => result is either an array of anon's or vm_pages (depending on flags)
238  * => flag values: UVM_LOAN_TOANON - loan to anons
239  *                 UVM_LOAN_TOPAGE - loan to wired kernel page
240  *    one and only one of these flags must be set!
241  * => returns 0 (success), or an appropriate error number
242  */
243 
244 int
245 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
246 {
247 	struct uvm_faultinfo ufi;
248 	void **result, **output;
249 	int rv, error;
250 
251 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
252 
253 	/*
254 	 * ensure that one and only one of the flags is set
255 	 */
256 
257 	KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
258 		((flags & UVM_LOAN_TOPAGE) == 0));
259 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
260 
261 	/*
262 	 * "output" is a pointer to the current place to put the loaned page.
263 	 */
264 
265 	result = v;
266 	output = &result[0];	/* start at the beginning ... */
267 
268 	/*
269 	 * while we've got pages to do
270 	 */
271 
272 	while (len > 0) {
273 
274 		/*
275 		 * fill in params for a call to uvmfault_lookup
276 		 */
277 
278 		ufi.orig_map = map;
279 		ufi.orig_rvaddr = start;
280 		ufi.orig_size = len;
281 
282 		/*
283 		 * do the lookup, the only time this will fail is if we hit on
284 		 * an unmapped region (an error)
285 		 */
286 
287 		if (!uvmfault_lookup(&ufi, FALSE)) {
288 			error = ENOENT;
289 			goto fail;
290 		}
291 
292 		/*
293 		 * map now locked.  now do the loanout...
294 		 */
295 
296 		rv = uvm_loanentry(&ufi, &output, flags);
297 		if (rv < 0) {
298 			/* all unlocked due to error */
299 			error = EINVAL;
300 			goto fail;
301 		}
302 
303 		/*
304 		 * done!  the map is unlocked.  advance, if possible.
305 		 *
306 		 * XXXCDC: could be recoded to hold the map lock with
307 		 *	   smarter code (but it only happens on map entry
308 		 *	   boundaries, so it isn't that bad).
309 		 */
310 
311 		if (rv) {
312 			rv <<= PAGE_SHIFT;
313 			len -= rv;
314 			start += rv;
315 		}
316 	}
317 	UVMHIST_LOG(loanhist, "success", 0,0,0,0);
318 	return 0;
319 
320 fail:
321 	/*
322 	 * failed to complete loans.  drop any loans and return failure code.
323 	 * map is already unlocked.
324 	 */
325 
326 	if (output - result) {
327 		if (flags & UVM_LOAN_TOANON) {
328 			uvm_unloananon((struct vm_anon **)result,
329 			    output - result);
330 		} else {
331 			uvm_unloanpage((struct vm_page **)result,
332 			    output - result);
333 		}
334 	}
335 	UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
336 	return (error);
337 }
338 
339 /*
340  * uvm_loananon: loan a page from an anon out
341  *
342  * => called with map, amap, uobj locked
343  * => return value:
344  *	-1 = fatal error, everything is unlocked, abort.
345  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
346  *		try again
347  *	 1 = got it, everything still locked
348  */
349 
350 int
351 uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
352     struct vm_anon *anon)
353 {
354 	struct vm_page *pg;
355 	int error;
356 
357 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
358 
359 	/*
360 	 * if we are loaning to "another" anon then it is easy, we just
361 	 * bump the reference count on the current anon and return a
362 	 * pointer to it (it becomes copy-on-write shared).
363 	 */
364 
365 	if (flags & UVM_LOAN_TOANON) {
366 		simple_lock(&anon->an_lock);
367 		pg = anon->an_page;
368 		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
369 			if (pg->wire_count > 0) {
370 				UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0);
371 				uvmfault_unlockall(ufi,
372 				    ufi->entry->aref.ar_amap,
373 				    ufi->entry->object.uvm_obj, anon);
374 				return (-1);
375 			}
376 			pmap_page_protect(pg, VM_PROT_READ);
377 		}
378 		anon->an_ref++;
379 		**output = anon;
380 		(*output)++;
381 		simple_unlock(&anon->an_lock);
382 		UVMHIST_LOG(loanhist, "->A done", 0,0,0,0);
383 		return (1);
384 	}
385 
386 	/*
387 	 * we are loaning to a kernel-page.   we need to get the page
388 	 * resident so we can wire it.   uvmfault_anonget will handle
389 	 * this for us.
390 	 */
391 
392 	simple_lock(&anon->an_lock);
393 	error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
394 
395 	/*
396 	 * if we were unable to get the anon, then uvmfault_anonget has
397 	 * unlocked everything and returned an error code.
398 	 */
399 
400 	if (error) {
401 		UVMHIST_LOG(loanhist, "error %d", error,0,0,0);
402 
403 		/* need to refault (i.e. refresh our lookup) ? */
404 		if (error == ERESTART) {
405 			return (0);
406 		}
407 
408 		/* "try again"?   sleep a bit and retry ... */
409 		if (error == EAGAIN) {
410 			tsleep(&lbolt, PVM, "loanagain", 0);
411 			return (0);
412 		}
413 
414 		/* otherwise flag it as an error */
415 		return (-1);
416 	}
417 
418 	/*
419 	 * we have the page and its owner locked: do the loan now.
420 	 */
421 
422 	pg = anon->an_page;
423 	uvm_lock_pageq();
424 	if (pg->wire_count > 0) {
425 		uvm_unlock_pageq();
426 		UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0);
427 		KASSERT(pg->uobject == NULL);
428 		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
429 		    ufi->entry->object.uvm_obj, anon);
430 		return (-1);
431 	}
432 	if (pg->loan_count == 0) {
433 		pmap_page_protect(pg, VM_PROT_READ);
434 	}
435 	pg->loan_count++;
436 	uvm_pagedequeue(pg);
437 	uvm_unlock_pageq();
438 	**output = pg;
439 	(*output)++;
440 
441 	/* unlock anon and return success */
442 	if (pg->uobject)	/* XXXCDC: what if this is our uobj? bad */
443 		simple_unlock(&pg->uobject->vmobjlock);
444 	simple_unlock(&anon->an_lock);
445 	UVMHIST_LOG(loanhist, "->K done", 0,0,0,0);
446 	return (1);
447 }
448 
449 /*
450  * uvm_loanpage: loan out pages to kernel (->K)
451  *
452  * => pages should be object-owned and the object should be locked.
453  * => in the case of error, the object might be unlocked and relocked.
454  * => caller should busy the pages beforehand.
455  * => pages will be unbusied.
456  * => fail with EBUSY if meet a wired page.
457  */
458 static int
459 uvm_loanpage(struct vm_page **pgpp, int npages)
460 {
461 	int i;
462 	int error = 0;
463 
464 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
465 
466 	for (i = 0; i < npages; i++) {
467 		struct vm_page *pg = pgpp[i];
468 
469 		KASSERT(pg->uobject != NULL);
470 		KASSERT(pg->uobject == pgpp[0]->uobject);
471 		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
472 		LOCK_ASSERT(simple_lock_held(&pg->uobject->vmobjlock));
473 		KASSERT(pg->flags & PG_BUSY);
474 
475 		uvm_lock_pageq();
476 		if (pg->wire_count > 0) {
477 			uvm_unlock_pageq();
478 			UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
479 			error = EBUSY;
480 			break;
481 		}
482 		if (pg->loan_count == 0) {
483 			pmap_page_protect(pg, VM_PROT_READ);
484 		}
485 		pg->loan_count++;
486 		uvm_pagedequeue(pg);
487 		uvm_unlock_pageq();
488 	}
489 
490 	uvm_page_unbusy(pgpp, npages);
491 
492 	if (error) {
493 		/*
494 		 * backout what we've done
495 		 */
496 		struct simplelock *slock = &pgpp[0]->uobject->vmobjlock;
497 
498 		simple_unlock(slock);
499 		uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE);
500 		simple_lock(slock);
501 	}
502 
503 	UVMHIST_LOG(loanhist, "done %d", error,0,0,0);
504 	return error;
505 }
506 
507 /*
508  * XXX UBC temp limit
509  * number of pages to get at once.
510  * should be <= MAX_READ_AHEAD in genfs_vnops.c
511  */
512 #define	UVM_LOAN_GET_CHUNK	16
513 
514 /*
515  * uvm_loanuobjpages: loan pages from a uobj out (O->K)
516  *
517  * => uobj shouldn't be locked.  (we'll lock it)
518  * => fail with EBUSY if we meet a wired page.
519  */
520 int
521 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
522     struct vm_page **origpgpp)
523 {
524 	int ndone; /* # of pages loaned out */
525 	struct vm_page **pgpp;
526 	int error;
527 	int i;
528 	struct simplelock *slock;
529 
530 	pgpp = origpgpp;
531 	for (ndone = 0; ndone < orignpages; ) {
532 		int npages;
533 		/* npendloan: # of pages busied but not loand out yet. */
534 		int npendloan = 0xdead; /* XXX gcc */
535 reget:
536 		npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone);
537 		simple_lock(&uobj->vmobjlock);
538 		error = (*uobj->pgops->pgo_get)(uobj,
539 		    pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0,
540 		    VM_PROT_READ, 0, PGO_SYNCIO);
541 		if (error == EAGAIN) {
542 			tsleep(&lbolt, PVM, "nfsread", 0);
543 			continue;
544 		}
545 		if (error)
546 			goto fail;
547 
548 		KASSERT(npages > 0);
549 
550 		/* loan and unbusy pages */
551 		slock = NULL;
552 		for (i = 0; i < npages; i++) {
553 			struct simplelock *nextslock; /* slock for next page */
554 			struct vm_page *pg = *pgpp;
555 
556 			/* XXX assuming that the page is owned by uobj */
557 			KASSERT(pg->uobject != NULL);
558 			nextslock = &pg->uobject->vmobjlock;
559 
560 			if (slock != nextslock) {
561 				if (slock) {
562 					KASSERT(npendloan > 0);
563 					error = uvm_loanpage(pgpp - npendloan,
564 					    npendloan);
565 					simple_unlock(slock);
566 					if (error)
567 						goto fail;
568 					ndone += npendloan;
569 					KASSERT(origpgpp + ndone == pgpp);
570 				}
571 				slock = nextslock;
572 				npendloan = 0;
573 				simple_lock(slock);
574 			}
575 
576 			if ((pg->flags & PG_RELEASED) != 0) {
577 				/*
578 				 * release pages and try again.
579 				 */
580 				simple_unlock(slock);
581 				for (; i < npages; i++) {
582 					pg = pgpp[i];
583 					slock = &pg->uobject->vmobjlock;
584 
585 					simple_lock(slock);
586 					uvm_lock_pageq();
587 					uvm_page_unbusy(&pg, 1);
588 					uvm_unlock_pageq();
589 					simple_unlock(slock);
590 				}
591 				goto reget;
592 			}
593 
594 			npendloan++;
595 			pgpp++;
596 			KASSERT(origpgpp + ndone + npendloan == pgpp);
597 		}
598 		KASSERT(slock != NULL);
599 		KASSERT(npendloan > 0);
600 		error = uvm_loanpage(pgpp - npendloan, npendloan);
601 		simple_unlock(slock);
602 		if (error)
603 			goto fail;
604 		ndone += npendloan;
605 		KASSERT(origpgpp + ndone == pgpp);
606 	}
607 
608 	return 0;
609 
610 fail:
611 	uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE);
612 
613 	return error;
614 }
615 
616 /*
617  * uvm_loanuobj: loan a page from a uobj out
618  *
619  * => called with map, amap, uobj locked
620  * => return value:
621  *	-1 = fatal error, everything is unlocked, abort.
622  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
623  *		try again
624  *	 1 = got it, everything still locked
625  */
626 
627 static int
628 uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
629 {
630 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
631 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
632 	struct vm_page *pg;
633 	struct vm_anon *anon;
634 	int error, npages;
635 	boolean_t locked;
636 
637 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
638 
639 	/*
640 	 * first we must make sure the page is resident.
641 	 *
642 	 * XXXCDC: duplicate code with uvm_fault().
643 	 */
644 
645 	if (uobj->pgops->pgo_get) {	/* try locked pgo_get */
646 		npages = 1;
647 		pg = NULL;
648 		error = (*uobj->pgops->pgo_get)(uobj,
649 		    va - ufi->entry->start + ufi->entry->offset,
650 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
651 	} else {
652 		error = EIO;		/* must have pgo_get op */
653 	}
654 
655 	/*
656 	 * check the result of the locked pgo_get.  if there is a problem,
657 	 * then we fail the loan.
658 	 */
659 
660 	if (error && error != EBUSY) {
661 		uvmfault_unlockall(ufi, amap, uobj, NULL);
662 		return (-1);
663 	}
664 
665 	/*
666 	 * if we need to unlock for I/O, do so now.
667 	 */
668 
669 	if (error == EBUSY) {
670 		uvmfault_unlockall(ufi, amap, NULL, NULL);
671 
672 		/* locked: uobj */
673 		npages = 1;
674 		error = (*uobj->pgops->pgo_get)(uobj,
675 		    va - ufi->entry->start + ufi->entry->offset,
676 		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
677 		/* locked: <nothing> */
678 
679 		if (error) {
680 			if (error == EAGAIN) {
681 				tsleep(&lbolt, PVM, "fltagain2", 0);
682 				return (0);
683 			}
684 			return (-1);
685 		}
686 
687 		/*
688 		 * pgo_get was a success.   attempt to relock everything.
689 		 */
690 
691 		locked = uvmfault_relock(ufi);
692 		if (locked && amap)
693 			amap_lock(amap);
694 		simple_lock(&uobj->vmobjlock);
695 
696 		/*
697 		 * verify that the page has not be released and re-verify
698 		 * that amap slot is still free.   if there is a problem we
699 		 * drop our lock (thus force a lookup refresh/retry).
700 		 */
701 
702 		if ((pg->flags & PG_RELEASED) != 0 ||
703 		    (locked && amap && amap_lookup(&ufi->entry->aref,
704 		    ufi->orig_rvaddr - ufi->entry->start))) {
705 			if (locked)
706 				uvmfault_unlockall(ufi, amap, NULL, NULL);
707 			locked = FALSE;
708 		}
709 
710 		/*
711 		 * didn't get the lock?   release the page and retry.
712 		 */
713 
714 		if (locked == FALSE) {
715 			if (pg->flags & PG_WANTED) {
716 				wakeup(pg);
717 			}
718 			if (pg->flags & PG_RELEASED) {
719 				uvm_lock_pageq();
720 				uvm_pagefree(pg);
721 				uvm_unlock_pageq();
722 				return (0);
723 			}
724 			uvm_lock_pageq();
725 			uvm_pageactivate(pg);
726 			uvm_unlock_pageq();
727 			pg->flags &= ~(PG_BUSY|PG_WANTED);
728 			UVM_PAGE_OWN(pg, NULL);
729 			simple_unlock(&uobj->vmobjlock);
730 			return (0);
731 		}
732 	}
733 
734 	/*
735 	 * at this point we have the page we want ("pg") marked PG_BUSY for us
736 	 * and we have all data structures locked.  do the loanout.  page can
737 	 * not be PG_RELEASED (we caught this above).
738 	 */
739 
740 	if ((flags & UVM_LOAN_TOANON) == 0) {
741 		if (uvm_loanpage(&pg, 1)) {
742 			uvmfault_unlockall(ufi, amap, uobj, NULL);
743 			return (-1);
744 		}
745 		**output = pg;
746 		(*output)++;
747 		return (1);
748 	}
749 
750 	/*
751 	 * must be a loan to an anon.   check to see if there is already
752 	 * an anon associated with this page.  if so, then just return
753 	 * a reference to this object.   the page should already be
754 	 * mapped read-only because it is already on loan.
755 	 */
756 
757 	if (pg->uanon) {
758 		anon = pg->uanon;
759 		simple_lock(&anon->an_lock);
760 		anon->an_ref++;
761 		simple_unlock(&anon->an_lock);
762 		if (pg->flags & PG_WANTED) {
763 			wakeup(pg);
764 		}
765 		pg->flags &= ~(PG_WANTED|PG_BUSY);
766 		UVM_PAGE_OWN(pg, NULL);
767 		**output = anon;
768 		(*output)++;
769 		return (1);
770 	}
771 
772 	/*
773 	 * need to allocate a new anon
774 	 */
775 
776 	anon = uvm_analloc();
777 	if (anon == NULL) {
778 		goto fail;
779 	}
780 	anon->an_page = pg;
781 	pg->uanon = anon;
782 	uvm_lock_pageq();
783 	if (pg->wire_count > 0) {
784 		uvm_unlock_pageq();
785 		UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0);
786 		pg->uanon = NULL;
787 		anon->an_page = NULL;
788 		anon->an_ref--;
789 		simple_unlock(&anon->an_lock);
790 		uvm_anfree(anon);
791 		goto fail;
792 	}
793 	if (pg->loan_count == 0) {
794 		pmap_page_protect(pg, VM_PROT_READ);
795 	}
796 	pg->loan_count++;
797 	uvm_pageactivate(pg);
798 	uvm_unlock_pageq();
799 	if (pg->flags & PG_WANTED) {
800 		wakeup(pg);
801 	}
802 	pg->flags &= ~(PG_WANTED|PG_BUSY);
803 	UVM_PAGE_OWN(pg, NULL);
804 	simple_unlock(&anon->an_lock);
805 	**output = anon;
806 	(*output)++;
807 	return (1);
808 
809 fail:
810 	UVMHIST_LOG(loanhist, "fail", 0,0,0,0);
811 	/*
812 	 * unlock everything and bail out.
813 	 */
814 	if (pg->flags & PG_WANTED) {
815 		wakeup(pg);
816 	}
817 	pg->flags &= ~(PG_WANTED|PG_BUSY);
818 	UVM_PAGE_OWN(pg, NULL);
819 	uvmfault_unlockall(ufi, amap, uobj, NULL);
820 	return (-1);
821 }
822 
823 /*
824  * uvm_loanzero: loan a zero-fill page out
825  *
826  * => called with map, amap, uobj locked
827  * => return value:
828  *	-1 = fatal error, everything is unlocked, abort.
829  *	 0 = lookup in ufi went stale, everything unlocked, relookup and
830  *		try again
831  *	 1 = got it, everything still locked
832  */
833 
834 static struct uvm_object uvm_loanzero_object;
835 
836 static int
837 uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags)
838 {
839 	struct vm_anon *anon;
840 	struct vm_page *pg;
841 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
842 	struct vm_amap *amap = ufi->entry->aref.ar_amap;
843 
844 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist);
845 again:
846 	simple_lock(&uvm_loanzero_object.vmobjlock);
847 
848 	/*
849 	 * first, get ahold of our single zero page.
850 	 */
851 
852 	if (__predict_false((pg =
853 			     TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
854 		while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
855 					   UVM_PGA_ZERO)) == NULL) {
856 			simple_unlock(&uvm_loanzero_object.vmobjlock);
857 			uvmfault_unlockall(ufi, amap, uobj, NULL);
858 			uvm_wait("loanzero");
859 			if (!uvmfault_relock(ufi)) {
860 				return (0);
861 			}
862 			if (amap) {
863 				amap_lock(amap);
864 			}
865 			if (uobj) {
866 				simple_lock(&uobj->vmobjlock);
867 			}
868 			goto again;
869 		}
870 
871 		/* got a zero'd page. */
872 		pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
873 		pg->flags |= PG_RDONLY;
874 		uvm_lock_pageq();
875 		uvm_pageactivate(pg);
876 		uvm_unlock_pageq();
877 		UVM_PAGE_OWN(pg, NULL);
878 	}
879 
880 	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
881 		uvm_lock_pageq();
882 		pg->loan_count++;
883 		uvm_pagedequeue(pg);
884 		uvm_unlock_pageq();
885 		simple_unlock(&uvm_loanzero_object.vmobjlock);
886 		**output = pg;
887 		(*output)++;
888 		return (1);
889 	}
890 
891 	/*
892 	 * loaning to an anon.  check to see if there is already an anon
893 	 * associated with this page.  if so, then just return a reference
894 	 * to this object.
895 	 */
896 
897 	if (pg->uanon) {
898 		anon = pg->uanon;
899 		simple_lock(&anon->an_lock);
900 		anon->an_ref++;
901 		simple_unlock(&anon->an_lock);
902 		simple_unlock(&uvm_loanzero_object.vmobjlock);
903 		**output = anon;
904 		(*output)++;
905 		return (1);
906 	}
907 
908 	/*
909 	 * need to allocate a new anon
910 	 */
911 
912 	anon = uvm_analloc();
913 	if (anon == NULL) {
914 		/* out of swap causes us to fail */
915 		simple_unlock(&uvm_loanzero_object.vmobjlock);
916 		uvmfault_unlockall(ufi, amap, uobj, NULL);
917 		return (-1);
918 	}
919 	anon->an_page = pg;
920 	pg->uanon = anon;
921 	uvm_lock_pageq();
922 	pg->loan_count++;
923 	uvm_pageactivate(pg);
924 	uvm_unlock_pageq();
925 	simple_unlock(&uvm_loanzero_object.vmobjlock);
926 	**output = anon;
927 	(*output)++;
928 	return (1);
929 }
930 
931 
932 /*
933  * uvm_unloananon: kill loans on anons (basically a normal ref drop)
934  *
935  * => we expect all our resources to be unlocked
936  */
937 
938 static void
939 uvm_unloananon(struct vm_anon **aloans, int nanons)
940 {
941 	struct vm_anon *anon;
942 
943 	while (nanons-- > 0) {
944 		int refs;
945 
946 		anon = *aloans++;
947 		simple_lock(&anon->an_lock);
948 		refs = --anon->an_ref;
949 		simple_unlock(&anon->an_lock);
950 
951 		if (refs == 0) {
952 			uvm_anfree(anon);
953 		}
954 	}
955 }
956 
957 /*
958  * uvm_unloanpage: kill loans on pages loaned out to the kernel
959  *
960  * => we expect all our resources to be unlocked
961  */
962 
963 static void
964 uvm_unloanpage(struct vm_page **ploans, int npages)
965 {
966 	struct vm_page *pg;
967 	struct simplelock *slock;
968 
969 	uvm_lock_pageq();
970 	while (npages-- > 0) {
971 		pg = *ploans++;
972 
973 		/*
974 		 * do a little dance to acquire the object or anon lock
975 		 * as appropriate.  we are locking in the wrong order,
976 		 * so we have to do a try-lock here.
977 		 */
978 
979 		slock = NULL;
980 		while (pg->uobject != NULL || pg->uanon != NULL) {
981 			if (pg->uobject != NULL) {
982 				slock = &pg->uobject->vmobjlock;
983 			} else {
984 				slock = &pg->uanon->an_lock;
985 			}
986 			if (simple_lock_try(slock)) {
987 				break;
988 			}
989 			uvm_unlock_pageq();
990 			uvm_lock_pageq();
991 			slock = NULL;
992 		}
993 
994 		/*
995 		 * drop our loan.  if page is owned by an anon but
996 		 * PQ_ANON is not set, the page was loaned to the anon
997 		 * from an object which dropped ownership, so resolve
998 		 * this by turning the anon's loan into real ownership
999 		 * (ie. decrement loan_count again and set PQ_ANON).
1000 		 * after all this, if there are no loans left, put the
1001 		 * page back a paging queue (if the page is owned by
1002 		 * an anon) or free it (if the page is now unowned).
1003 		 */
1004 
1005 		KASSERT(pg->loan_count > 0);
1006 		pg->loan_count--;
1007 		if (pg->uobject == NULL && pg->uanon != NULL &&
1008 		    (pg->pqflags & PQ_ANON) == 0) {
1009 			KASSERT(pg->loan_count > 0);
1010 			pg->loan_count--;
1011 			pg->pqflags |= PQ_ANON;
1012 		}
1013 		if (pg->loan_count == 0) {
1014 			if (pg->uobject == NULL && pg->uanon == NULL) {
1015 				KASSERT((pg->flags & PG_BUSY) == 0);
1016 				uvm_pagefree(pg);
1017 			} else {
1018 				uvm_pageactivate(pg);
1019 			}
1020 		} else if (pg->loan_count == 1 && pg->uobject != NULL &&
1021 			   pg->uanon != NULL) {
1022 			uvm_pageactivate(pg);
1023 		}
1024 		if (slock != NULL) {
1025 			simple_unlock(slock);
1026 		}
1027 	}
1028 	uvm_unlock_pageq();
1029 }
1030 
1031 /*
1032  * uvm_unloan: kill loans on pages or anons.
1033  */
1034 
1035 void
1036 uvm_unloan(void *v, int npages, int flags)
1037 {
1038 	if (flags & UVM_LOAN_TOANON) {
1039 		uvm_unloananon(v, npages);
1040 	} else {
1041 		uvm_unloanpage(v, npages);
1042 	}
1043 }
1044 
1045 /*
1046  * Minimal pager for uvm_loanzero_object.  We need to provide a "put"
1047  * method, because the page can end up on a paging queue, and the
1048  * page daemon will want to call pgo_put when it encounters the page
1049  * on the inactive list.
1050  */
1051 
1052 static int
1053 ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
1054 {
1055 	struct vm_page *pg;
1056 
1057 	KDASSERT(uobj == &uvm_loanzero_object);
1058 
1059 	/*
1060 	 * Don't need to do any work here if we're not freeing pages.
1061 	 */
1062 
1063 	if ((flags & PGO_FREE) == 0) {
1064 		simple_unlock(&uobj->vmobjlock);
1065 		return 0;
1066 	}
1067 
1068 	/*
1069 	 * we don't actually want to ever free the uvm_loanzero_page, so
1070 	 * just reactivate or dequeue it.
1071 	 */
1072 
1073 	pg = TAILQ_FIRST(&uobj->memq);
1074 	KASSERT(pg != NULL);
1075 	KASSERT(TAILQ_NEXT(pg, listq) == NULL);
1076 
1077 	uvm_lock_pageq();
1078 	if (pg->uanon)
1079 		uvm_pageactivate(pg);
1080 	else
1081 		uvm_pagedequeue(pg);
1082 	uvm_unlock_pageq();
1083 
1084 	simple_unlock(&uobj->vmobjlock);
1085 	return 0;
1086 }
1087 
1088 static struct uvm_pagerops ulz_pager = {
1089 	NULL,		/* init */
1090 	NULL,		/* reference */
1091 	NULL,		/* detach */
1092 	NULL,		/* fault */
1093 	NULL,		/* get */
1094 	ulz_put,	/* put */
1095 };
1096 
1097 /*
1098  * uvm_loan_init(): initialize the uvm_loan() facility.
1099  */
1100 
1101 void
1102 uvm_loan_init(void)
1103 {
1104 
1105 	simple_lock_init(&uvm_loanzero_object.vmobjlock);
1106 	TAILQ_INIT(&uvm_loanzero_object.memq);
1107 	uvm_loanzero_object.pgops = &ulz_pager;
1108 
1109 	UVMHIST_INIT(loanhist, 300);
1110 }
1111 
1112 /*
1113  * uvm_loanbreak: break loan on a uobj page
1114  *
1115  * => called with uobj locked
1116  * => the page should be busy
1117  * => return value:
1118  *	newly allocated page if succeeded
1119  */
1120 struct vm_page *
1121 uvm_loanbreak(struct vm_page *uobjpage)
1122 {
1123 	struct vm_page *pg;
1124 #ifdef DIAGNOSTIC
1125 	struct uvm_object *uobj = uobjpage->uobject;
1126 #endif
1127 
1128 	KASSERT(uobj != NULL);
1129 	LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
1130 	KASSERT(uobjpage->flags & PG_BUSY);
1131 
1132 	/* alloc new un-owned page */
1133 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
1134 	if (pg == NULL)
1135 		return NULL;
1136 
1137 	/*
1138 	 * copy the data from the old page to the new
1139 	 * one and clear the fake/clean flags on the
1140 	 * new page (keep it busy).  force a reload
1141 	 * of the old page by clearing it from all
1142 	 * pmaps.  then lock the page queues to
1143 	 * rename the pages.
1144 	 */
1145 
1146 	uvm_pagecopy(uobjpage, pg);	/* old -> new */
1147 	pg->flags &= ~(PG_FAKE|PG_CLEAN);
1148 	pmap_page_protect(uobjpage, VM_PROT_NONE);
1149 	if (uobjpage->flags & PG_WANTED)
1150 		wakeup(uobjpage);
1151 	/* uobj still locked */
1152 	uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
1153 	UVM_PAGE_OWN(uobjpage, NULL);
1154 
1155 	uvm_lock_pageq();
1156 
1157 	/*
1158 	 * replace uobjpage with new page.
1159 	 */
1160 
1161 	uvm_pagereplace(uobjpage, pg);
1162 
1163 	/*
1164 	 * if the page is no longer referenced by
1165 	 * an anon (i.e. we are breaking an O->K
1166 	 * loan), then remove it from any pageq's.
1167 	 */
1168 	if (uobjpage->uanon == NULL)
1169 		uvm_pagedequeue(uobjpage);
1170 
1171 	/*
1172 	 * at this point we have absolutely no
1173 	 * control over uobjpage
1174 	 */
1175 
1176 	/* install new page */
1177 	uvm_pageactivate(pg);
1178 	uvm_unlock_pageq();
1179 
1180 	/*
1181 	 * done!  loan is broken and "pg" is
1182 	 * PG_BUSY.   it can now replace uobjpage.
1183 	 */
1184 
1185 	return pg;
1186 }
1187