1 /* $NetBSD: uvm_loan.c,v 1.77 2010/02/03 14:02:49 uebayasi Exp $ */ 2 3 /* 4 * 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Charles D. Cranor and 19 * Washington University. 20 * 4. The name of the author may not be used to endorse or promote products 21 * derived from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * 34 * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp 35 */ 36 37 /* 38 * uvm_loan.c: page loanout handler 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.77 2010/02/03 14:02:49 uebayasi Exp $"); 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/proc.h> 48 #include <sys/malloc.h> 49 #include <sys/mman.h> 50 51 #include <uvm/uvm.h> 52 53 /* 54 * "loaned" pages are pages which are (read-only, copy-on-write) loaned 55 * from the VM system to other parts of the kernel. this allows page 56 * copying to be avoided (e.g. you can loan pages from objs/anons to 57 * the mbuf system). 58 * 59 * there are 3 types of loans possible: 60 * O->K uvm_object page to wired kernel page (e.g. mbuf data area) 61 * A->K anon page to wired kernel page (e.g. mbuf data area) 62 * O->A uvm_object to anon loan (e.g. vnode page to an anon) 63 * note that it possible to have an O page loaned to both an A and K 64 * at the same time. 65 * 66 * loans are tracked by pg->loan_count. an O->A page will have both 67 * a uvm_object and a vm_anon, but PQ_ANON will not be set. this sort 68 * of page is considered "owned" by the uvm_object (not the anon). 69 * 70 * each loan of a page to the kernel bumps the pg->wire_count. the 71 * kernel mappings for these pages will be read-only and wired. since 72 * the page will also be wired, it will not be a candidate for pageout, 73 * and thus will never be pmap_page_protect()'d with VM_PROT_NONE. a 74 * write fault in the kernel to one of these pages will not cause 75 * copy-on-write. instead, the page fault is considered fatal. this 76 * is because the kernel mapping will have no way to look up the 77 * object/anon which the page is owned by. this is a good side-effect, 78 * since a kernel write to a loaned page is an error. 79 * 80 * owners that want to free their pages and discover that they are 81 * loaned out simply "disown" them (the page becomes an orphan). these 82 * pages should be freed when the last loan is dropped. in some cases 83 * an anon may "adopt" an orphaned page. 84 * 85 * locking: to read pg->loan_count either the owner or the page queues 86 * must be locked. to modify pg->loan_count, both the owner of the page 87 * and the PQs must be locked. pg->flags is (as always) locked by 88 * the owner of the page. 89 * 90 * note that locking from the "loaned" side is tricky since the object 91 * getting the loaned page has no reference to the page's owner and thus 92 * the owner could "die" at any time. in order to prevent the owner 93 * from dying the page queues should be locked. this forces us to sometimes 94 * use "try" locking. 95 * 96 * loans are typically broken by the following events: 97 * 1. user-level xwrite fault to a loaned page 98 * 2. pageout of clean+inactive O->A loaned page 99 * 3. owner frees page (e.g. pager flush) 100 * 101 * note that loaning a page causes all mappings of the page to become 102 * read-only (via pmap_page_protect). this could have an unexpected 103 * effect on normal "wired" pages if one is not careful (XXX). 104 */ 105 106 /* 107 * local prototypes 108 */ 109 110 static int uvm_loananon(struct uvm_faultinfo *, void ***, 111 int, struct vm_anon *); 112 static int uvm_loanuobj(struct uvm_faultinfo *, void ***, 113 int, vaddr_t); 114 static int uvm_loanzero(struct uvm_faultinfo *, void ***, int); 115 static void uvm_unloananon(struct vm_anon **, int); 116 static void uvm_unloanpage(struct vm_page **, int); 117 static int uvm_loanpage(struct vm_page **, int); 118 119 120 /* 121 * inlines 122 */ 123 124 /* 125 * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan()) 126 * 127 * => "ufi" is the result of a successful map lookup (meaning that 128 * on entry the map is locked by the caller) 129 * => we may unlock and then relock the map if needed (for I/O) 130 * => we put our output result in "output" 131 * => we always return with the map unlocked 132 * => possible return values: 133 * -1 == error, map is unlocked 134 * 0 == map relock error (try again!), map is unlocked 135 * >0 == number of pages we loaned, map is unlocked 136 * 137 * NOTE: We can live with this being an inline, because it is only called 138 * from one place. 139 */ 140 141 static inline int 142 uvm_loanentry(struct uvm_faultinfo *ufi, void ***output, int flags) 143 { 144 vaddr_t curaddr = ufi->orig_rvaddr; 145 vsize_t togo = ufi->size; 146 struct vm_aref *aref = &ufi->entry->aref; 147 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 148 struct vm_anon *anon; 149 int rv, result = 0; 150 151 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); 152 153 /* 154 * lock us the rest of the way down (we unlock before return) 155 */ 156 if (aref->ar_amap) 157 amap_lock(aref->ar_amap); 158 159 /* 160 * loop until done 161 */ 162 while (togo) { 163 164 /* 165 * find the page we want. check the anon layer first. 166 */ 167 168 if (aref->ar_amap) { 169 anon = amap_lookup(aref, curaddr - ufi->entry->start); 170 } else { 171 anon = NULL; 172 } 173 174 /* locked: map, amap, uobj */ 175 if (anon) { 176 rv = uvm_loananon(ufi, output, flags, anon); 177 } else if (uobj) { 178 rv = uvm_loanuobj(ufi, output, flags, curaddr); 179 } else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) { 180 rv = uvm_loanzero(ufi, output, flags); 181 } else { 182 uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL); 183 rv = -1; 184 } 185 /* locked: if (rv > 0) => map, amap, uobj [o.w. unlocked] */ 186 KASSERT(rv > 0 || aref->ar_amap == NULL || 187 !mutex_owned(&aref->ar_amap->am_l)); 188 KASSERT(rv > 0 || uobj == NULL || 189 !mutex_owned(&uobj->vmobjlock)); 190 191 /* total failure */ 192 if (rv < 0) { 193 UVMHIST_LOG(loanhist, "failure %d", rv, 0,0,0); 194 return (-1); 195 } 196 197 /* relock failed, need to do another lookup */ 198 if (rv == 0) { 199 UVMHIST_LOG(loanhist, "relock failure %d", result 200 ,0,0,0); 201 return (result); 202 } 203 204 /* 205 * got it... advance to next page 206 */ 207 208 result++; 209 togo -= PAGE_SIZE; 210 curaddr += PAGE_SIZE; 211 } 212 213 /* 214 * unlock what we locked, unlock the maps and return 215 */ 216 217 if (aref->ar_amap) 218 amap_unlock(aref->ar_amap); 219 uvmfault_unlockmaps(ufi, false); 220 UVMHIST_LOG(loanhist, "done %d", result, 0,0,0); 221 return (result); 222 } 223 224 /* 225 * normal functions 226 */ 227 228 /* 229 * uvm_loan: loan pages in a map out to anons or to the kernel 230 * 231 * => map should be unlocked 232 * => start and len should be multiples of PAGE_SIZE 233 * => result is either an array of anon's or vm_pages (depending on flags) 234 * => flag values: UVM_LOAN_TOANON - loan to anons 235 * UVM_LOAN_TOPAGE - loan to wired kernel page 236 * one and only one of these flags must be set! 237 * => returns 0 (success), or an appropriate error number 238 */ 239 240 int 241 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) 242 { 243 struct uvm_faultinfo ufi; 244 void **result, **output; 245 int rv, error; 246 247 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); 248 249 /* 250 * ensure that one and only one of the flags is set 251 */ 252 253 KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^ 254 ((flags & UVM_LOAN_TOPAGE) == 0)); 255 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 256 257 /* 258 * "output" is a pointer to the current place to put the loaned page. 259 */ 260 261 result = v; 262 output = &result[0]; /* start at the beginning ... */ 263 264 /* 265 * while we've got pages to do 266 */ 267 268 while (len > 0) { 269 270 /* 271 * fill in params for a call to uvmfault_lookup 272 */ 273 274 ufi.orig_map = map; 275 ufi.orig_rvaddr = start; 276 ufi.orig_size = len; 277 278 /* 279 * do the lookup, the only time this will fail is if we hit on 280 * an unmapped region (an error) 281 */ 282 283 if (!uvmfault_lookup(&ufi, false)) { 284 error = ENOENT; 285 goto fail; 286 } 287 288 /* 289 * map now locked. now do the loanout... 290 */ 291 292 rv = uvm_loanentry(&ufi, &output, flags); 293 if (rv < 0) { 294 /* all unlocked due to error */ 295 error = EINVAL; 296 goto fail; 297 } 298 299 /* 300 * done! the map is unlocked. advance, if possible. 301 * 302 * XXXCDC: could be recoded to hold the map lock with 303 * smarter code (but it only happens on map entry 304 * boundaries, so it isn't that bad). 305 */ 306 307 if (rv) { 308 rv <<= PAGE_SHIFT; 309 len -= rv; 310 start += rv; 311 } 312 } 313 UVMHIST_LOG(loanhist, "success", 0,0,0,0); 314 return 0; 315 316 fail: 317 /* 318 * failed to complete loans. drop any loans and return failure code. 319 * map is already unlocked. 320 */ 321 322 if (output - result) { 323 if (flags & UVM_LOAN_TOANON) { 324 uvm_unloananon((struct vm_anon **)result, 325 output - result); 326 } else { 327 uvm_unloanpage((struct vm_page **)result, 328 output - result); 329 } 330 } 331 UVMHIST_LOG(loanhist, "error %d", error,0,0,0); 332 return (error); 333 } 334 335 /* 336 * uvm_loananon: loan a page from an anon out 337 * 338 * => called with map, amap, uobj locked 339 * => return value: 340 * -1 = fatal error, everything is unlocked, abort. 341 * 0 = lookup in ufi went stale, everything unlocked, relookup and 342 * try again 343 * 1 = got it, everything still locked 344 */ 345 346 int 347 uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags, 348 struct vm_anon *anon) 349 { 350 struct vm_page *pg; 351 int error; 352 353 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); 354 355 /* 356 * if we are loaning to "another" anon then it is easy, we just 357 * bump the reference count on the current anon and return a 358 * pointer to it (it becomes copy-on-write shared). 359 */ 360 361 if (flags & UVM_LOAN_TOANON) { 362 mutex_enter(&anon->an_lock); 363 pg = anon->an_page; 364 if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) { 365 if (pg->wire_count > 0) { 366 UVMHIST_LOG(loanhist, "->A wired %p", pg,0,0,0); 367 uvmfault_unlockall(ufi, 368 ufi->entry->aref.ar_amap, 369 ufi->entry->object.uvm_obj, anon); 370 return (-1); 371 } 372 pmap_page_protect(pg, VM_PROT_READ); 373 } 374 anon->an_ref++; 375 **output = anon; 376 (*output)++; 377 mutex_exit(&anon->an_lock); 378 UVMHIST_LOG(loanhist, "->A done", 0,0,0,0); 379 return (1); 380 } 381 382 /* 383 * we are loaning to a kernel-page. we need to get the page 384 * resident so we can wire it. uvmfault_anonget will handle 385 * this for us. 386 */ 387 388 mutex_enter(&anon->an_lock); 389 error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon); 390 391 /* 392 * if we were unable to get the anon, then uvmfault_anonget has 393 * unlocked everything and returned an error code. 394 */ 395 396 if (error) { 397 UVMHIST_LOG(loanhist, "error %d", error,0,0,0); 398 399 /* need to refault (i.e. refresh our lookup) ? */ 400 if (error == ERESTART) { 401 return (0); 402 } 403 404 /* "try again"? sleep a bit and retry ... */ 405 if (error == EAGAIN) { 406 kpause("loanagain", false, hz/2, NULL); 407 return (0); 408 } 409 410 /* otherwise flag it as an error */ 411 return (-1); 412 } 413 414 /* 415 * we have the page and its owner locked: do the loan now. 416 */ 417 418 pg = anon->an_page; 419 mutex_enter(&uvm_pageqlock); 420 if (pg->wire_count > 0) { 421 mutex_exit(&uvm_pageqlock); 422 UVMHIST_LOG(loanhist, "->K wired %p", pg,0,0,0); 423 KASSERT(pg->uobject == NULL); 424 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 425 NULL, anon); 426 return (-1); 427 } 428 if (pg->loan_count == 0) { 429 pmap_page_protect(pg, VM_PROT_READ); 430 } 431 pg->loan_count++; 432 uvm_pageactivate(pg); 433 mutex_exit(&uvm_pageqlock); 434 **output = pg; 435 (*output)++; 436 437 /* unlock anon and return success */ 438 if (pg->uobject) 439 mutex_exit(&pg->uobject->vmobjlock); 440 mutex_exit(&anon->an_lock); 441 UVMHIST_LOG(loanhist, "->K done", 0,0,0,0); 442 return (1); 443 } 444 445 /* 446 * uvm_loanpage: loan out pages to kernel (->K) 447 * 448 * => pages should be object-owned and the object should be locked. 449 * => in the case of error, the object might be unlocked and relocked. 450 * => caller should busy the pages beforehand. 451 * => pages will be unbusied. 452 * => fail with EBUSY if meet a wired page. 453 */ 454 static int 455 uvm_loanpage(struct vm_page **pgpp, int npages) 456 { 457 int i; 458 int error = 0; 459 460 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); 461 462 for (i = 0; i < npages; i++) { 463 struct vm_page *pg = pgpp[i]; 464 465 KASSERT(pg->uobject != NULL); 466 KASSERT(pg->uobject == pgpp[0]->uobject); 467 KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT))); 468 KASSERT(mutex_owned(&pg->uobject->vmobjlock)); 469 KASSERT(pg->flags & PG_BUSY); 470 471 mutex_enter(&uvm_pageqlock); 472 if (pg->wire_count > 0) { 473 mutex_exit(&uvm_pageqlock); 474 UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0); 475 error = EBUSY; 476 break; 477 } 478 if (pg->loan_count == 0) { 479 pmap_page_protect(pg, VM_PROT_READ); 480 } 481 pg->loan_count++; 482 uvm_pageactivate(pg); 483 mutex_exit(&uvm_pageqlock); 484 } 485 486 uvm_page_unbusy(pgpp, npages); 487 488 if (error) { 489 /* 490 * backout what we've done 491 */ 492 kmutex_t *slock = &pgpp[0]->uobject->vmobjlock; 493 494 mutex_exit(slock); 495 uvm_unloan(pgpp, i, UVM_LOAN_TOPAGE); 496 mutex_enter(slock); 497 } 498 499 UVMHIST_LOG(loanhist, "done %d", error,0,0,0); 500 return error; 501 } 502 503 /* 504 * XXX UBC temp limit 505 * number of pages to get at once. 506 * should be <= MAX_READ_AHEAD in genfs_vnops.c 507 */ 508 #define UVM_LOAN_GET_CHUNK 16 509 510 /* 511 * uvm_loanuobjpages: loan pages from a uobj out (O->K) 512 * 513 * => uobj shouldn't be locked. (we'll lock it) 514 * => fail with EBUSY if we meet a wired page. 515 */ 516 int 517 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, 518 struct vm_page **origpgpp) 519 { 520 int ndone; /* # of pages loaned out */ 521 struct vm_page **pgpp; 522 int error; 523 int i; 524 kmutex_t *slock; 525 526 pgpp = origpgpp; 527 for (ndone = 0; ndone < orignpages; ) { 528 int npages; 529 /* npendloan: # of pages busied but not loand out yet. */ 530 int npendloan = 0xdead; /* XXX gcc */ 531 reget: 532 npages = MIN(UVM_LOAN_GET_CHUNK, orignpages - ndone); 533 mutex_enter(&uobj->vmobjlock); 534 error = (*uobj->pgops->pgo_get)(uobj, 535 pgoff + (ndone << PAGE_SHIFT), pgpp, &npages, 0, 536 VM_PROT_READ, 0, PGO_SYNCIO); 537 if (error == EAGAIN) { 538 kpause("loanuopg", false, hz/2, NULL); 539 continue; 540 } 541 if (error) 542 goto fail; 543 544 KASSERT(npages > 0); 545 546 /* loan and unbusy pages */ 547 slock = NULL; 548 for (i = 0; i < npages; i++) { 549 kmutex_t *nextslock; /* slock for next page */ 550 struct vm_page *pg = *pgpp; 551 552 /* XXX assuming that the page is owned by uobj */ 553 KASSERT(pg->uobject != NULL); 554 nextslock = &pg->uobject->vmobjlock; 555 556 if (slock != nextslock) { 557 if (slock) { 558 KASSERT(npendloan > 0); 559 error = uvm_loanpage(pgpp - npendloan, 560 npendloan); 561 mutex_exit(slock); 562 if (error) 563 goto fail; 564 ndone += npendloan; 565 KASSERT(origpgpp + ndone == pgpp); 566 } 567 slock = nextslock; 568 npendloan = 0; 569 mutex_enter(slock); 570 } 571 572 if ((pg->flags & PG_RELEASED) != 0) { 573 /* 574 * release pages and try again. 575 */ 576 mutex_exit(slock); 577 for (; i < npages; i++) { 578 pg = pgpp[i]; 579 slock = &pg->uobject->vmobjlock; 580 581 mutex_enter(slock); 582 mutex_enter(&uvm_pageqlock); 583 uvm_page_unbusy(&pg, 1); 584 mutex_exit(&uvm_pageqlock); 585 mutex_exit(slock); 586 } 587 goto reget; 588 } 589 590 npendloan++; 591 pgpp++; 592 KASSERT(origpgpp + ndone + npendloan == pgpp); 593 } 594 KASSERT(slock != NULL); 595 KASSERT(npendloan > 0); 596 error = uvm_loanpage(pgpp - npendloan, npendloan); 597 mutex_exit(slock); 598 if (error) 599 goto fail; 600 ndone += npendloan; 601 KASSERT(origpgpp + ndone == pgpp); 602 } 603 604 return 0; 605 606 fail: 607 uvm_unloan(origpgpp, ndone, UVM_LOAN_TOPAGE); 608 609 return error; 610 } 611 612 /* 613 * uvm_loanuobj: loan a page from a uobj out 614 * 615 * => called with map, amap, uobj locked 616 * => return value: 617 * -1 = fatal error, everything is unlocked, abort. 618 * 0 = lookup in ufi went stale, everything unlocked, relookup and 619 * try again 620 * 1 = got it, everything still locked 621 */ 622 623 static int 624 uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va) 625 { 626 struct vm_amap *amap = ufi->entry->aref.ar_amap; 627 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 628 struct vm_page *pg; 629 struct vm_anon *anon; 630 int error, npages; 631 bool locked; 632 633 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); 634 635 /* 636 * first we must make sure the page is resident. 637 * 638 * XXXCDC: duplicate code with uvm_fault(). 639 */ 640 641 /* locked: maps(read), amap(if there) */ 642 mutex_enter(&uobj->vmobjlock); 643 /* locked: maps(read), amap(if there), uobj */ 644 645 if (uobj->pgops->pgo_get) { /* try locked pgo_get */ 646 npages = 1; 647 pg = NULL; 648 error = (*uobj->pgops->pgo_get)(uobj, 649 va - ufi->entry->start + ufi->entry->offset, 650 &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED); 651 } else { 652 error = EIO; /* must have pgo_get op */ 653 } 654 655 /* 656 * check the result of the locked pgo_get. if there is a problem, 657 * then we fail the loan. 658 */ 659 660 if (error && error != EBUSY) { 661 uvmfault_unlockall(ufi, amap, uobj, NULL); 662 return (-1); 663 } 664 665 /* 666 * if we need to unlock for I/O, do so now. 667 */ 668 669 if (error == EBUSY) { 670 uvmfault_unlockall(ufi, amap, NULL, NULL); 671 672 /* locked: uobj */ 673 npages = 1; 674 error = (*uobj->pgops->pgo_get)(uobj, 675 va - ufi->entry->start + ufi->entry->offset, 676 &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO); 677 /* locked: <nothing> */ 678 679 if (error) { 680 if (error == EAGAIN) { 681 kpause("fltagain2", false, hz/2, NULL); 682 return (0); 683 } 684 return (-1); 685 } 686 687 /* 688 * pgo_get was a success. attempt to relock everything. 689 */ 690 691 locked = uvmfault_relock(ufi); 692 if (locked && amap) 693 amap_lock(amap); 694 uobj = pg->uobject; 695 mutex_enter(&uobj->vmobjlock); 696 697 /* 698 * verify that the page has not be released and re-verify 699 * that amap slot is still free. if there is a problem we 700 * drop our lock (thus force a lookup refresh/retry). 701 */ 702 703 if ((pg->flags & PG_RELEASED) != 0 || 704 (locked && amap && amap_lookup(&ufi->entry->aref, 705 ufi->orig_rvaddr - ufi->entry->start))) { 706 if (locked) 707 uvmfault_unlockall(ufi, amap, NULL, NULL); 708 locked = false; 709 } 710 711 /* 712 * didn't get the lock? release the page and retry. 713 */ 714 715 if (locked == false) { 716 if (pg->flags & PG_WANTED) { 717 wakeup(pg); 718 } 719 if (pg->flags & PG_RELEASED) { 720 mutex_enter(&uvm_pageqlock); 721 uvm_pagefree(pg); 722 mutex_exit(&uvm_pageqlock); 723 mutex_exit(&uobj->vmobjlock); 724 return (0); 725 } 726 mutex_enter(&uvm_pageqlock); 727 uvm_pageactivate(pg); 728 mutex_exit(&uvm_pageqlock); 729 pg->flags &= ~(PG_BUSY|PG_WANTED); 730 UVM_PAGE_OWN(pg, NULL); 731 mutex_exit(&uobj->vmobjlock); 732 return (0); 733 } 734 } 735 736 KASSERT(uobj == pg->uobject); 737 738 /* 739 * at this point we have the page we want ("pg") marked PG_BUSY for us 740 * and we have all data structures locked. do the loanout. page can 741 * not be PG_RELEASED (we caught this above). 742 */ 743 744 if ((flags & UVM_LOAN_TOANON) == 0) { 745 if (uvm_loanpage(&pg, 1)) { 746 uvmfault_unlockall(ufi, amap, uobj, NULL); 747 return (-1); 748 } 749 mutex_exit(&uobj->vmobjlock); 750 **output = pg; 751 (*output)++; 752 return (1); 753 } 754 755 /* 756 * must be a loan to an anon. check to see if there is already 757 * an anon associated with this page. if so, then just return 758 * a reference to this object. the page should already be 759 * mapped read-only because it is already on loan. 760 */ 761 762 if (pg->uanon) { 763 anon = pg->uanon; 764 mutex_enter(&anon->an_lock); 765 anon->an_ref++; 766 mutex_exit(&anon->an_lock); 767 if (pg->flags & PG_WANTED) { 768 wakeup(pg); 769 } 770 pg->flags &= ~(PG_WANTED|PG_BUSY); 771 UVM_PAGE_OWN(pg, NULL); 772 mutex_exit(&uobj->vmobjlock); 773 **output = anon; 774 (*output)++; 775 return (1); 776 } 777 778 /* 779 * need to allocate a new anon 780 */ 781 782 anon = uvm_analloc(); 783 if (anon == NULL) { 784 goto fail; 785 } 786 anon->an_page = pg; 787 pg->uanon = anon; 788 mutex_enter(&uvm_pageqlock); 789 if (pg->wire_count > 0) { 790 mutex_exit(&uvm_pageqlock); 791 UVMHIST_LOG(loanhist, "wired %p", pg,0,0,0); 792 pg->uanon = NULL; 793 anon->an_page = NULL; 794 anon->an_ref--; 795 mutex_exit(&anon->an_lock); 796 uvm_anfree(anon); 797 goto fail; 798 } 799 if (pg->loan_count == 0) { 800 pmap_page_protect(pg, VM_PROT_READ); 801 } 802 pg->loan_count++; 803 uvm_pageactivate(pg); 804 mutex_exit(&uvm_pageqlock); 805 if (pg->flags & PG_WANTED) { 806 wakeup(pg); 807 } 808 pg->flags &= ~(PG_WANTED|PG_BUSY); 809 UVM_PAGE_OWN(pg, NULL); 810 mutex_exit(&uobj->vmobjlock); 811 mutex_exit(&anon->an_lock); 812 **output = anon; 813 (*output)++; 814 return (1); 815 816 fail: 817 UVMHIST_LOG(loanhist, "fail", 0,0,0,0); 818 /* 819 * unlock everything and bail out. 820 */ 821 if (pg->flags & PG_WANTED) { 822 wakeup(pg); 823 } 824 pg->flags &= ~(PG_WANTED|PG_BUSY); 825 UVM_PAGE_OWN(pg, NULL); 826 uvmfault_unlockall(ufi, amap, uobj, NULL); 827 return (-1); 828 } 829 830 /* 831 * uvm_loanzero: loan a zero-fill page out 832 * 833 * => called with map, amap, uobj locked 834 * => return value: 835 * -1 = fatal error, everything is unlocked, abort. 836 * 0 = lookup in ufi went stale, everything unlocked, relookup and 837 * try again 838 * 1 = got it, everything still locked 839 */ 840 841 static struct uvm_object uvm_loanzero_object; 842 843 static int 844 uvm_loanzero(struct uvm_faultinfo *ufi, void ***output, int flags) 845 { 846 struct vm_anon *anon; 847 struct vm_page *pg; 848 struct vm_amap *amap = ufi->entry->aref.ar_amap; 849 850 UVMHIST_FUNC(__func__); UVMHIST_CALLED(loanhist); 851 again: 852 mutex_enter(&uvm_loanzero_object.vmobjlock); 853 854 /* 855 * first, get ahold of our single zero page. 856 */ 857 858 if (__predict_false((pg = 859 TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) { 860 while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL, 861 UVM_PGA_ZERO)) == NULL) { 862 mutex_exit(&uvm_loanzero_object.vmobjlock); 863 uvmfault_unlockall(ufi, amap, NULL, NULL); 864 uvm_wait("loanzero"); 865 if (!uvmfault_relock(ufi)) { 866 return (0); 867 } 868 if (amap) { 869 amap_lock(amap); 870 } 871 goto again; 872 } 873 874 /* got a zero'd page. */ 875 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); 876 pg->flags |= PG_RDONLY; 877 mutex_enter(&uvm_pageqlock); 878 uvm_pageactivate(pg); 879 mutex_exit(&uvm_pageqlock); 880 UVM_PAGE_OWN(pg, NULL); 881 } 882 883 if ((flags & UVM_LOAN_TOANON) == 0) { /* loaning to kernel-page */ 884 mutex_enter(&uvm_pageqlock); 885 pg->loan_count++; 886 mutex_exit(&uvm_pageqlock); 887 mutex_exit(&uvm_loanzero_object.vmobjlock); 888 **output = pg; 889 (*output)++; 890 return (1); 891 } 892 893 /* 894 * loaning to an anon. check to see if there is already an anon 895 * associated with this page. if so, then just return a reference 896 * to this object. 897 */ 898 899 if (pg->uanon) { 900 anon = pg->uanon; 901 mutex_enter(&anon->an_lock); 902 anon->an_ref++; 903 mutex_exit(&anon->an_lock); 904 mutex_exit(&uvm_loanzero_object.vmobjlock); 905 **output = anon; 906 (*output)++; 907 return (1); 908 } 909 910 /* 911 * need to allocate a new anon 912 */ 913 914 anon = uvm_analloc(); 915 if (anon == NULL) { 916 /* out of swap causes us to fail */ 917 mutex_exit(&uvm_loanzero_object.vmobjlock); 918 uvmfault_unlockall(ufi, amap, NULL, NULL); 919 return (-1); 920 } 921 anon->an_page = pg; 922 pg->uanon = anon; 923 mutex_enter(&uvm_pageqlock); 924 pg->loan_count++; 925 uvm_pageactivate(pg); 926 mutex_exit(&uvm_pageqlock); 927 mutex_exit(&anon->an_lock); 928 mutex_exit(&uvm_loanzero_object.vmobjlock); 929 **output = anon; 930 (*output)++; 931 return (1); 932 } 933 934 935 /* 936 * uvm_unloananon: kill loans on anons (basically a normal ref drop) 937 * 938 * => we expect all our resources to be unlocked 939 */ 940 941 static void 942 uvm_unloananon(struct vm_anon **aloans, int nanons) 943 { 944 struct vm_anon *anon; 945 946 while (nanons-- > 0) { 947 int refs; 948 949 anon = *aloans++; 950 mutex_enter(&anon->an_lock); 951 refs = --anon->an_ref; 952 mutex_exit(&anon->an_lock); 953 954 if (refs == 0) { 955 uvm_anfree(anon); 956 } 957 } 958 } 959 960 /* 961 * uvm_unloanpage: kill loans on pages loaned out to the kernel 962 * 963 * => we expect all our resources to be unlocked 964 */ 965 966 static void 967 uvm_unloanpage(struct vm_page **ploans, int npages) 968 { 969 struct vm_page *pg; 970 kmutex_t *slock; 971 972 mutex_enter(&uvm_pageqlock); 973 while (npages-- > 0) { 974 pg = *ploans++; 975 976 /* 977 * do a little dance to acquire the object or anon lock 978 * as appropriate. we are locking in the wrong order, 979 * so we have to do a try-lock here. 980 */ 981 982 slock = NULL; 983 while (pg->uobject != NULL || pg->uanon != NULL) { 984 if (pg->uobject != NULL) { 985 slock = &pg->uobject->vmobjlock; 986 } else { 987 slock = &pg->uanon->an_lock; 988 } 989 if (mutex_tryenter(slock)) { 990 break; 991 } 992 mutex_exit(&uvm_pageqlock); 993 /* XXX Better than yielding but inadequate. */ 994 kpause("livelock", false, 1, NULL); 995 mutex_enter(&uvm_pageqlock); 996 slock = NULL; 997 } 998 999 /* 1000 * drop our loan. if page is owned by an anon but 1001 * PQ_ANON is not set, the page was loaned to the anon 1002 * from an object which dropped ownership, so resolve 1003 * this by turning the anon's loan into real ownership 1004 * (ie. decrement loan_count again and set PQ_ANON). 1005 * after all this, if there are no loans left, put the 1006 * page back a paging queue (if the page is owned by 1007 * an anon) or free it (if the page is now unowned). 1008 */ 1009 1010 KASSERT(pg->loan_count > 0); 1011 pg->loan_count--; 1012 if (pg->uobject == NULL && pg->uanon != NULL && 1013 (pg->pqflags & PQ_ANON) == 0) { 1014 KASSERT(pg->loan_count > 0); 1015 pg->loan_count--; 1016 pg->pqflags |= PQ_ANON; 1017 } 1018 if (pg->loan_count == 0 && pg->uobject == NULL && 1019 pg->uanon == NULL) { 1020 KASSERT((pg->flags & PG_BUSY) == 0); 1021 uvm_pagefree(pg); 1022 } 1023 if (slock != NULL) { 1024 mutex_exit(slock); 1025 } 1026 } 1027 mutex_exit(&uvm_pageqlock); 1028 } 1029 1030 /* 1031 * uvm_unloan: kill loans on pages or anons. 1032 */ 1033 1034 void 1035 uvm_unloan(void *v, int npages, int flags) 1036 { 1037 if (flags & UVM_LOAN_TOANON) { 1038 uvm_unloananon(v, npages); 1039 } else { 1040 uvm_unloanpage(v, npages); 1041 } 1042 } 1043 1044 /* 1045 * Minimal pager for uvm_loanzero_object. We need to provide a "put" 1046 * method, because the page can end up on a paging queue, and the 1047 * page daemon will want to call pgo_put when it encounters the page 1048 * on the inactive list. 1049 */ 1050 1051 static int 1052 ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) 1053 { 1054 struct vm_page *pg; 1055 1056 KDASSERT(uobj == &uvm_loanzero_object); 1057 1058 /* 1059 * Don't need to do any work here if we're not freeing pages. 1060 */ 1061 1062 if ((flags & PGO_FREE) == 0) { 1063 mutex_exit(&uobj->vmobjlock); 1064 return 0; 1065 } 1066 1067 /* 1068 * we don't actually want to ever free the uvm_loanzero_page, so 1069 * just reactivate or dequeue it. 1070 */ 1071 1072 pg = TAILQ_FIRST(&uobj->memq); 1073 KASSERT(pg != NULL); 1074 KASSERT(TAILQ_NEXT(pg, listq.queue) == NULL); 1075 1076 mutex_enter(&uvm_pageqlock); 1077 if (pg->uanon) 1078 uvm_pageactivate(pg); 1079 else 1080 uvm_pagedequeue(pg); 1081 mutex_exit(&uvm_pageqlock); 1082 1083 mutex_exit(&uobj->vmobjlock); 1084 return 0; 1085 } 1086 1087 static const struct uvm_pagerops ulz_pager = { 1088 .pgo_put = ulz_put, 1089 }; 1090 1091 /* 1092 * uvm_loan_init(): initialize the uvm_loan() facility. 1093 */ 1094 1095 void 1096 uvm_loan_init(void) 1097 { 1098 1099 UVM_OBJ_INIT(&uvm_loanzero_object, &ulz_pager, 0); 1100 1101 UVMHIST_INIT(loanhist, 300); 1102 } 1103 1104 /* 1105 * uvm_loanbreak: break loan on a uobj page 1106 * 1107 * => called with uobj locked 1108 * => the page should be busy 1109 * => return value: 1110 * newly allocated page if succeeded 1111 */ 1112 struct vm_page * 1113 uvm_loanbreak(struct vm_page *uobjpage) 1114 { 1115 struct vm_page *pg; 1116 #ifdef DIAGNOSTIC 1117 struct uvm_object *uobj = uobjpage->uobject; 1118 #endif 1119 1120 KASSERT(uobj != NULL); 1121 KASSERT(mutex_owned(&uobj->vmobjlock)); 1122 KASSERT(uobjpage->flags & PG_BUSY); 1123 1124 /* alloc new un-owned page */ 1125 pg = uvm_pagealloc(NULL, 0, NULL, 0); 1126 if (pg == NULL) 1127 return NULL; 1128 1129 /* 1130 * copy the data from the old page to the new 1131 * one and clear the fake flags on the new page (keep it busy). 1132 * force a reload of the old page by clearing it from all 1133 * pmaps. 1134 * transfer dirtiness of the old page to the new page. 1135 * then lock the page queues to rename the pages. 1136 */ 1137 1138 uvm_pagecopy(uobjpage, pg); /* old -> new */ 1139 pg->flags &= ~PG_FAKE; 1140 pmap_page_protect(uobjpage, VM_PROT_NONE); 1141 if ((uobjpage->flags & PG_CLEAN) != 0 && !pmap_clear_modify(uobjpage)) { 1142 pmap_clear_modify(pg); 1143 pg->flags |= PG_CLEAN; 1144 } else { 1145 /* uvm_pagecopy marked it dirty */ 1146 KASSERT((pg->flags & PG_CLEAN) == 0); 1147 /* a object with a dirty page should be dirty. */ 1148 KASSERT(!UVM_OBJ_IS_CLEAN(uobj)); 1149 } 1150 if (uobjpage->flags & PG_WANTED) 1151 wakeup(uobjpage); 1152 /* uobj still locked */ 1153 uobjpage->flags &= ~(PG_WANTED|PG_BUSY); 1154 UVM_PAGE_OWN(uobjpage, NULL); 1155 1156 mutex_enter(&uvm_pageqlock); 1157 1158 /* 1159 * replace uobjpage with new page. 1160 */ 1161 1162 uvm_pagereplace(uobjpage, pg); 1163 1164 /* 1165 * if the page is no longer referenced by 1166 * an anon (i.e. we are breaking an O->K 1167 * loan), then remove it from any pageq's. 1168 */ 1169 if (uobjpage->uanon == NULL) 1170 uvm_pagedequeue(uobjpage); 1171 1172 /* 1173 * at this point we have absolutely no 1174 * control over uobjpage 1175 */ 1176 1177 /* install new page */ 1178 uvm_pageactivate(pg); 1179 mutex_exit(&uvm_pageqlock); 1180 1181 /* 1182 * done! loan is broken and "pg" is 1183 * PG_BUSY. it can now replace uobjpage. 1184 */ 1185 1186 return pg; 1187 } 1188 1189 int 1190 uvm_loanbreak_anon(struct vm_anon *anon, struct uvm_object *uobj) 1191 { 1192 struct vm_page *pg; 1193 1194 KASSERT(mutex_owned(&anon->an_lock)); 1195 KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock)); 1196 1197 /* get new un-owned replacement page */ 1198 pg = uvm_pagealloc(NULL, 0, NULL, 0); 1199 if (pg == NULL) { 1200 return ENOMEM; 1201 } 1202 1203 /* 1204 * copy data, kill loan, and drop uobj lock (if any) 1205 */ 1206 /* copy old -> new */ 1207 uvm_pagecopy(anon->an_page, pg); 1208 1209 /* force reload */ 1210 pmap_page_protect(anon->an_page, VM_PROT_NONE); 1211 mutex_enter(&uvm_pageqlock); /* KILL loan */ 1212 1213 anon->an_page->uanon = NULL; 1214 /* in case we owned */ 1215 anon->an_page->pqflags &= ~PQ_ANON; 1216 1217 if (uobj) { 1218 /* if we were receiver of loan */ 1219 anon->an_page->loan_count--; 1220 } else { 1221 /* 1222 * we were the lender (A->K); need to remove the page from 1223 * pageq's. 1224 */ 1225 uvm_pagedequeue(anon->an_page); 1226 } 1227 1228 if (uobj) { 1229 mutex_exit(&uobj->vmobjlock); 1230 } 1231 1232 /* install new page in anon */ 1233 anon->an_page = pg; 1234 pg->uanon = anon; 1235 pg->pqflags |= PQ_ANON; 1236 1237 uvm_pageactivate(pg); 1238 mutex_exit(&uvm_pageqlock); 1239 1240 pg->flags &= ~(PG_BUSY|PG_FAKE); 1241 UVM_PAGE_OWN(pg, NULL); 1242 1243 /* done! */ 1244 1245 return 0; 1246 } 1247