1 /* $NetBSD: uvm_amap.c,v 1.126 2021/03/13 15:29:55 skrll Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 /* 29 * uvm_amap.c: amap operations 30 */ 31 32 /* 33 * this file contains functions that perform operations on amaps. see 34 * uvm_amap.h for a brief explanation of the role of amaps in uvm. 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.126 2021/03/13 15:29:55 skrll Exp $"); 39 40 #include "opt_uvmhist.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/kmem.h> 46 #include <sys/pool.h> 47 #include <sys/atomic.h> 48 49 #include <uvm/uvm.h> 50 #include <uvm/uvm_swap.h> 51 52 /* 53 * cache for allocation of vm_map structures. note that in order to 54 * avoid an endless loop, the amap cache's allocator cannot allocate 55 * memory from an amap (it currently goes through the kernel uobj, so 56 * we are ok). 57 */ 58 static struct pool_cache uvm_amap_cache; 59 static kmutex_t amap_list_lock __cacheline_aligned; 60 static LIST_HEAD(, vm_amap) amap_list; 61 62 /* 63 * local functions 64 */ 65 66 static int 67 amap_roundup_slots(int slots) 68 { 69 70 return kmem_roundup_size(slots * sizeof(int)) / sizeof(int); 71 } 72 73 #ifdef UVM_AMAP_PPREF 74 /* 75 * what is ppref? ppref is an _optional_ amap feature which is used 76 * to keep track of reference counts on a per-page basis. it is enabled 77 * when UVM_AMAP_PPREF is defined. 78 * 79 * when enabled, an array of ints is allocated for the pprefs. this 80 * array is allocated only when a partial reference is added to the 81 * map (either by unmapping part of the amap, or gaining a reference 82 * to only a part of an amap). if the allocation of the array fails 83 * (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate 84 * that we tried to do ppref's but couldn't alloc the array so just 85 * give up (after all, this is an optional feature!). 86 * 87 * the array is divided into page sized "chunks." for chunks of length 1, 88 * the chunk reference count plus one is stored in that chunk's slot. 89 * for chunks of length > 1 the first slot contains (the reference count 90 * plus one) * -1. [the negative value indicates that the length is 91 * greater than one.] the second slot of the chunk contains the length 92 * of the chunk. here is an example: 93 * 94 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1 95 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x 96 * <----------><-><----><-------><----><-><-------> 97 * (x = don't care) 98 * 99 * this allows us to allow one int to contain the ref count for the whole 100 * chunk. note that the "plus one" part is needed because a reference 101 * count of zero is neither positive or negative (need a way to tell 102 * if we've got one zero or a bunch of them). 103 * 104 * here are some in-line functions to help us. 105 */ 106 107 /* 108 * pp_getreflen: get the reference and length for a specific offset 109 * 110 * => ppref's amap must be locked 111 */ 112 static inline void 113 pp_getreflen(int *ppref, int offset, int *refp, int *lenp) 114 { 115 116 if (ppref[offset] > 0) { /* chunk size must be 1 */ 117 *refp = ppref[offset] - 1; /* don't forget to adjust */ 118 *lenp = 1; 119 } else { 120 *refp = (ppref[offset] * -1) - 1; 121 *lenp = ppref[offset+1]; 122 } 123 } 124 125 /* 126 * pp_setreflen: set the reference and length for a specific offset 127 * 128 * => ppref's amap must be locked 129 */ 130 static inline void 131 pp_setreflen(int *ppref, int offset, int ref, int len) 132 { 133 if (len == 0) 134 return; 135 if (len == 1) { 136 ppref[offset] = ref + 1; 137 } else { 138 ppref[offset] = (ref + 1) * -1; 139 ppref[offset+1] = len; 140 } 141 } 142 #endif /* UVM_AMAP_PPREF */ 143 144 /* 145 * amap_alloc1: allocate an amap, but do not initialise the overlay. 146 * 147 * => Note: lock is not set. 148 */ 149 static struct vm_amap * 150 amap_alloc1(int slots, int padslots, int flags) 151 { 152 const bool nowait = (flags & UVM_FLAG_NOWAIT) != 0; 153 const km_flag_t kmflags = nowait ? KM_NOSLEEP : KM_SLEEP; 154 struct vm_amap *amap; 155 krwlock_t *newlock, *oldlock; 156 int totalslots; 157 158 amap = pool_cache_get(&uvm_amap_cache, nowait ? PR_NOWAIT : PR_WAITOK); 159 if (amap == NULL) { 160 return NULL; 161 } 162 KASSERT(amap->am_lock != NULL); 163 KASSERT(amap->am_nused == 0); 164 165 /* Try to privatize the lock if currently shared. */ 166 if (rw_obj_refcnt(amap->am_lock) > 1) { 167 newlock = rw_obj_tryalloc(); 168 if (newlock != NULL) { 169 oldlock = amap->am_lock; 170 mutex_enter(&amap_list_lock); 171 amap->am_lock = newlock; 172 mutex_exit(&amap_list_lock); 173 rw_obj_free(oldlock); 174 } 175 } 176 177 totalslots = amap_roundup_slots(slots + padslots); 178 amap->am_ref = 1; 179 amap->am_flags = 0; 180 #ifdef UVM_AMAP_PPREF 181 amap->am_ppref = NULL; 182 #endif 183 amap->am_maxslot = totalslots; 184 amap->am_nslot = slots; 185 186 /* 187 * Note: since allocations are likely big, we expect to reduce the 188 * memory fragmentation by allocating them in separate blocks. 189 */ 190 amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags); 191 if (amap->am_slots == NULL) 192 goto fail1; 193 194 amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags); 195 if (amap->am_bckptr == NULL) 196 goto fail2; 197 198 amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *), 199 kmflags); 200 if (amap->am_anon == NULL) 201 goto fail3; 202 203 return amap; 204 205 fail3: 206 kmem_free(amap->am_bckptr, totalslots * sizeof(int)); 207 fail2: 208 kmem_free(amap->am_slots, totalslots * sizeof(int)); 209 fail1: 210 pool_cache_put(&uvm_amap_cache, amap); 211 212 /* 213 * XXX hack to tell the pagedaemon how many pages we need, 214 * since we can need more than it would normally free. 215 */ 216 if (nowait) { 217 extern u_int uvm_extrapages; 218 atomic_add_int(&uvm_extrapages, 219 ((sizeof(int) * 2 + sizeof(struct vm_anon *)) * 220 totalslots) >> PAGE_SHIFT); 221 } 222 return NULL; 223 } 224 225 /* 226 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM 227 * 228 * => caller should ensure sz is a multiple of PAGE_SIZE 229 * => reference count to new amap is set to one 230 * => new amap is returned unlocked 231 */ 232 233 struct vm_amap * 234 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf) 235 { 236 struct vm_amap *amap; 237 int slots, padslots; 238 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 239 240 AMAP_B2SLOT(slots, sz); 241 AMAP_B2SLOT(padslots, padsz); 242 243 amap = amap_alloc1(slots, padslots, waitf); 244 if (amap) { 245 memset(amap->am_anon, 0, 246 amap->am_maxslot * sizeof(struct vm_anon *)); 247 } 248 249 UVMHIST_LOG(maphist,"<- done, amap = %#jx, sz=%jd", (uintptr_t)amap, 250 sz, 0, 0); 251 return(amap); 252 } 253 254 /* 255 * amap_ctor: pool_cache constructor for new amaps 256 * 257 * => carefully synchronize with amap_swap_off() 258 */ 259 static int 260 amap_ctor(void *arg, void *obj, int flags) 261 { 262 struct vm_amap *amap = obj; 263 264 if ((flags & PR_NOWAIT) != 0) { 265 amap->am_lock = rw_obj_tryalloc(); 266 if (amap->am_lock == NULL) { 267 return ENOMEM; 268 } 269 } else { 270 amap->am_lock = rw_obj_alloc(); 271 } 272 amap->am_nused = 0; 273 amap->am_flags = 0; 274 275 mutex_enter(&amap_list_lock); 276 LIST_INSERT_HEAD(&amap_list, amap, am_list); 277 mutex_exit(&amap_list_lock); 278 return 0; 279 } 280 281 /* 282 * amap_ctor: pool_cache destructor for amaps 283 * 284 * => carefully synchronize with amap_swap_off() 285 */ 286 static void 287 amap_dtor(void *arg, void *obj) 288 { 289 struct vm_amap *amap = obj; 290 291 KASSERT(amap->am_nused == 0); 292 293 mutex_enter(&amap_list_lock); 294 LIST_REMOVE(amap, am_list); 295 mutex_exit(&amap_list_lock); 296 rw_obj_free(amap->am_lock); 297 } 298 299 /* 300 * uvm_amap_init: initialize the amap system. 301 */ 302 void 303 uvm_amap_init(void) 304 { 305 306 mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE); 307 308 pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 0, 0, 309 PR_LARGECACHE, "amappl", NULL, IPL_NONE, amap_ctor, amap_dtor, 310 NULL); 311 } 312 313 /* 314 * amap_free: free an amap 315 * 316 * => the amap must be unlocked 317 * => the amap should have a zero reference count and be empty 318 */ 319 void 320 amap_free(struct vm_amap *amap) 321 { 322 int slots; 323 324 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 325 326 KASSERT(amap->am_ref == 0 && amap->am_nused == 0); 327 KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0); 328 slots = amap->am_maxslot; 329 kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots)); 330 kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr)); 331 kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon)); 332 #ifdef UVM_AMAP_PPREF 333 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) 334 kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref)); 335 #endif 336 pool_cache_put(&uvm_amap_cache, amap); 337 UVMHIST_LOG(maphist,"<- done, freed amap = %#jx", (uintptr_t)amap, 338 0, 0, 0); 339 } 340 341 /* 342 * amap_extend: extend the size of an amap (if needed) 343 * 344 * => called from uvm_map when we want to extend an amap to cover 345 * a new mapping (rather than allocate a new one) 346 * => amap should be unlocked (we will lock it) 347 * => to safely extend an amap it should have a reference count of 348 * one (thus it can't be shared) 349 */ 350 int 351 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags) 352 { 353 struct vm_amap *amap = entry->aref.ar_amap; 354 int slotoff = entry->aref.ar_pageoff; 355 int slotmapped, slotadd, slotneed, slotadded, slotalloc; 356 int slotadj, slotarea, slotendoff; 357 int oldnslots; 358 #ifdef UVM_AMAP_PPREF 359 int *newppref, *oldppref; 360 #endif 361 int i, *newsl, *newbck, *oldsl, *oldbck; 362 struct vm_anon **newover, **oldover; 363 const km_flag_t kmflags = 364 (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP; 365 366 UVMHIST_FUNC(__func__); 367 UVMHIST_CALLARGS(maphist, " (entry=%#jx, addsize=%#jx, flags=%#jx)", 368 (uintptr_t)entry, addsize, flags, 0); 369 370 /* 371 * first, determine how many slots we need in the amap. don't 372 * forget that ar_pageoff could be non-zero: this means that 373 * there are some unused slots before us in the amap. 374 */ 375 376 amap_lock(amap, RW_WRITER); 377 KASSERT(amap_refs(amap) == 1); /* amap can't be shared */ 378 AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */ 379 AMAP_B2SLOT(slotadd, addsize); /* slots to add */ 380 if (flags & AMAP_EXTEND_FORWARDS) { 381 slotneed = slotoff + slotmapped + slotadd; 382 slotadj = 0; 383 slotarea = 0; 384 } else { 385 slotneed = slotadd + slotmapped; 386 slotadj = slotadd - slotoff; 387 slotarea = amap->am_maxslot - slotmapped; 388 } 389 390 /* 391 * Because this amap only has 1 ref, we know that there is 392 * only one vm_map_entry pointing to it, and the one entry is 393 * using slots between slotoff and slotoff + slotmapped. If 394 * we have been using ppref then we know that only slots in 395 * the one map entry's range can have anons, since ppref 396 * allowed us to free any anons outside that range as other map 397 * entries which used this amap were removed. But without ppref, 398 * we couldn't know which slots were still needed by other map 399 * entries, so we couldn't free any anons as we removed map 400 * entries, and so any slot from 0 to am_nslot can have an 401 * anon. But now that we know there is only one map entry 402 * left and we know its range, we can free up any anons 403 * outside that range. This is necessary because the rest of 404 * this function assumes that there are no anons in the amap 405 * outside of the one map entry's range. 406 */ 407 408 slotendoff = slotoff + slotmapped; 409 if (amap->am_ppref == PPREF_NONE) { 410 amap_wiperange(amap, 0, slotoff); 411 amap_wiperange(amap, slotendoff, amap->am_nslot - slotendoff); 412 } 413 for (i = 0; i < slotoff; i++) { 414 KASSERT(amap->am_anon[i] == NULL); 415 } 416 for (i = slotendoff; i < amap->am_nslot - slotendoff; i++) { 417 KASSERT(amap->am_anon[i] == NULL); 418 } 419 420 /* 421 * case 1: we already have enough slots in the map and thus 422 * only need to bump the reference counts on the slots we are 423 * adding. 424 */ 425 426 if (flags & AMAP_EXTEND_FORWARDS) { 427 if (amap->am_nslot >= slotneed) { 428 #ifdef UVM_AMAP_PPREF 429 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 430 amap_pp_adjref(amap, slotoff + slotmapped, 431 slotadd, 1); 432 } 433 #endif 434 amap_unlock(amap); 435 UVMHIST_LOG(maphist, 436 "<- done (case 1f), amap = %#jx, sltneed=%jd", 437 (uintptr_t)amap, slotneed, 0, 0); 438 return 0; 439 } 440 } else { 441 if (slotadj <= 0) { 442 slotoff -= slotadd; 443 entry->aref.ar_pageoff = slotoff; 444 #ifdef UVM_AMAP_PPREF 445 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 446 amap_pp_adjref(amap, slotoff, slotadd, 1); 447 } 448 #endif 449 amap_unlock(amap); 450 UVMHIST_LOG(maphist, 451 "<- done (case 1b), amap = %#jx, sltneed=%jd", 452 (uintptr_t)amap, slotneed, 0, 0); 453 return 0; 454 } 455 } 456 457 /* 458 * case 2: we pre-allocated slots for use and we just need to 459 * bump nslot up to take account for these slots. 460 */ 461 462 if (amap->am_maxslot >= slotneed) { 463 if (flags & AMAP_EXTEND_FORWARDS) { 464 #ifdef UVM_AMAP_PPREF 465 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 466 if ((slotoff + slotmapped) < amap->am_nslot) 467 amap_pp_adjref(amap, 468 slotoff + slotmapped, 469 (amap->am_nslot - 470 (slotoff + slotmapped)), 1); 471 pp_setreflen(amap->am_ppref, amap->am_nslot, 1, 472 slotneed - amap->am_nslot); 473 } 474 #endif 475 amap->am_nslot = slotneed; 476 amap_unlock(amap); 477 478 /* 479 * no need to zero am_anon since that was done at 480 * alloc time and we never shrink an allocation. 481 */ 482 483 UVMHIST_LOG(maphist,"<- done (case 2f), amap = %#jx, " 484 "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0); 485 return 0; 486 } else { 487 #ifdef UVM_AMAP_PPREF 488 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 489 /* 490 * Slide up the ref counts on the pages that 491 * are actually in use. 492 */ 493 memmove(amap->am_ppref + slotarea, 494 amap->am_ppref + slotoff, 495 slotmapped * sizeof(int)); 496 /* 497 * Mark the (adjusted) gap at the front as 498 * referenced/not referenced. 499 */ 500 pp_setreflen(amap->am_ppref, 501 0, 0, slotarea - slotadd); 502 pp_setreflen(amap->am_ppref, 503 slotarea - slotadd, 1, slotadd); 504 } 505 #endif 506 507 /* 508 * Slide the anon pointers up and clear out 509 * the space we just made. 510 */ 511 memmove(amap->am_anon + slotarea, 512 amap->am_anon + slotoff, 513 slotmapped * sizeof(struct vm_anon*)); 514 memset(amap->am_anon + slotoff, 0, 515 (slotarea - slotoff) * sizeof(struct vm_anon *)); 516 517 /* 518 * Slide the backpointers up, but don't bother 519 * wiping out the old slots. 520 */ 521 memmove(amap->am_bckptr + slotarea, 522 amap->am_bckptr + slotoff, 523 slotmapped * sizeof(int)); 524 525 /* 526 * Adjust all the useful active slot numbers. 527 */ 528 for (i = 0; i < amap->am_nused; i++) 529 amap->am_slots[i] += (slotarea - slotoff); 530 531 /* 532 * We just filled all the empty space in the 533 * front of the amap by activating a few new 534 * slots. 535 */ 536 amap->am_nslot = amap->am_maxslot; 537 entry->aref.ar_pageoff = slotarea - slotadd; 538 amap_unlock(amap); 539 540 UVMHIST_LOG(maphist,"<- done (case 2b), amap = %#jx, " 541 "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0); 542 return 0; 543 } 544 } 545 546 /* 547 * Case 3: we need to allocate a new amap and copy all the amap 548 * data over from old amap to the new one. Drop the lock before 549 * performing allocation. 550 * 551 * Note: since allocations are likely big, we expect to reduce the 552 * memory fragmentation by allocating them in separate blocks. 553 */ 554 555 amap_unlock(amap); 556 557 if (slotneed >= UVM_AMAP_LARGE) { 558 return E2BIG; 559 } 560 561 slotalloc = amap_roundup_slots(slotneed); 562 #ifdef UVM_AMAP_PPREF 563 newppref = NULL; 564 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 565 /* Will be handled later if fails. */ 566 newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags); 567 } 568 #endif 569 newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags); 570 newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags); 571 newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags); 572 if (newsl == NULL || newbck == NULL || newover == NULL) { 573 #ifdef UVM_AMAP_PPREF 574 if (newppref != NULL) { 575 kmem_free(newppref, slotalloc * sizeof(*newppref)); 576 } 577 #endif 578 if (newsl != NULL) { 579 kmem_free(newsl, slotalloc * sizeof(*newsl)); 580 } 581 if (newbck != NULL) { 582 kmem_free(newbck, slotalloc * sizeof(*newbck)); 583 } 584 if (newover != NULL) { 585 kmem_free(newover, slotalloc * sizeof(*newover)); 586 } 587 return ENOMEM; 588 } 589 amap_lock(amap, RW_WRITER); 590 KASSERT(amap->am_maxslot < slotneed); 591 592 /* 593 * Copy everything over to new allocated areas. 594 */ 595 596 slotadded = slotalloc - amap->am_nslot; 597 if (!(flags & AMAP_EXTEND_FORWARDS)) 598 slotarea = slotalloc - slotmapped; 599 600 /* do am_slots */ 601 oldsl = amap->am_slots; 602 if (flags & AMAP_EXTEND_FORWARDS) 603 memcpy(newsl, oldsl, sizeof(int) * amap->am_nused); 604 else 605 for (i = 0; i < amap->am_nused; i++) 606 newsl[i] = oldsl[i] + slotarea - slotoff; 607 amap->am_slots = newsl; 608 609 /* do am_anon */ 610 oldover = amap->am_anon; 611 if (flags & AMAP_EXTEND_FORWARDS) { 612 memcpy(newover, oldover, 613 sizeof(struct vm_anon *) * amap->am_nslot); 614 memset(newover + amap->am_nslot, 0, 615 sizeof(struct vm_anon *) * slotadded); 616 } else { 617 memcpy(newover + slotarea, oldover + slotoff, 618 sizeof(struct vm_anon *) * slotmapped); 619 memset(newover, 0, 620 sizeof(struct vm_anon *) * slotarea); 621 } 622 amap->am_anon = newover; 623 624 /* do am_bckptr */ 625 oldbck = amap->am_bckptr; 626 if (flags & AMAP_EXTEND_FORWARDS) 627 memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot); 628 else 629 memcpy(newbck + slotarea, oldbck + slotoff, 630 sizeof(int) * slotmapped); 631 amap->am_bckptr = newbck; 632 633 #ifdef UVM_AMAP_PPREF 634 /* do ppref */ 635 oldppref = amap->am_ppref; 636 if (newppref) { 637 if (flags & AMAP_EXTEND_FORWARDS) { 638 memcpy(newppref, oldppref, 639 sizeof(int) * amap->am_nslot); 640 memset(newppref + amap->am_nslot, 0, 641 sizeof(int) * slotadded); 642 } else { 643 memcpy(newppref + slotarea, oldppref + slotoff, 644 sizeof(int) * slotmapped); 645 } 646 amap->am_ppref = newppref; 647 if ((flags & AMAP_EXTEND_FORWARDS) && 648 (slotoff + slotmapped) < amap->am_nslot) 649 amap_pp_adjref(amap, slotoff + slotmapped, 650 (amap->am_nslot - (slotoff + slotmapped)), 1); 651 if (flags & AMAP_EXTEND_FORWARDS) 652 pp_setreflen(newppref, amap->am_nslot, 1, 653 slotneed - amap->am_nslot); 654 else { 655 pp_setreflen(newppref, 0, 0, 656 slotalloc - slotneed); 657 pp_setreflen(newppref, slotalloc - slotneed, 1, 658 slotneed - slotmapped); 659 } 660 } else { 661 if (amap->am_ppref) 662 amap->am_ppref = PPREF_NONE; 663 } 664 #endif 665 666 /* update master values */ 667 if (flags & AMAP_EXTEND_FORWARDS) 668 amap->am_nslot = slotneed; 669 else { 670 entry->aref.ar_pageoff = slotarea - slotadd; 671 amap->am_nslot = slotalloc; 672 } 673 oldnslots = amap->am_maxslot; 674 amap->am_maxslot = slotalloc; 675 amap_unlock(amap); 676 677 kmem_free(oldsl, oldnslots * sizeof(*oldsl)); 678 kmem_free(oldbck, oldnslots * sizeof(*oldbck)); 679 kmem_free(oldover, oldnslots * sizeof(*oldover)); 680 #ifdef UVM_AMAP_PPREF 681 if (oldppref && oldppref != PPREF_NONE) 682 kmem_free(oldppref, oldnslots * sizeof(*oldppref)); 683 #endif 684 UVMHIST_LOG(maphist,"<- done (case 3), amap = %#jx, slotneed=%jd", 685 (uintptr_t)amap, slotneed, 0, 0); 686 return 0; 687 } 688 689 /* 690 * amap_share_protect: change protection of anons in a shared amap 691 * 692 * for shared amaps, given the current data structure layout, it is 693 * not possible for us to directly locate all maps referencing the 694 * shared anon (to change the protection). in order to protect data 695 * in shared maps we use pmap_page_protect(). [this is useful for IPC 696 * mechanisms like map entry passing that may want to write-protect 697 * all mappings of a shared amap.] we traverse am_anon or am_slots 698 * depending on the current state of the amap. 699 * 700 * => entry's map and amap must be locked by the caller 701 */ 702 void 703 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot) 704 { 705 struct vm_amap *amap = entry->aref.ar_amap; 706 u_int slots, lcv, slot, stop; 707 struct vm_anon *anon; 708 709 KASSERT(rw_write_held(amap->am_lock)); 710 711 AMAP_B2SLOT(slots, (entry->end - entry->start)); 712 stop = entry->aref.ar_pageoff + slots; 713 714 if (slots < amap->am_nused) { 715 /* 716 * Cheaper to traverse am_anon. 717 */ 718 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) { 719 anon = amap->am_anon[lcv]; 720 if (anon == NULL) { 721 continue; 722 } 723 if (anon->an_page) { 724 pmap_page_protect(anon->an_page, prot); 725 } 726 } 727 return; 728 } 729 730 /* 731 * Cheaper to traverse am_slots. 732 */ 733 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 734 slot = amap->am_slots[lcv]; 735 if (slot < entry->aref.ar_pageoff || slot >= stop) { 736 continue; 737 } 738 anon = amap->am_anon[slot]; 739 if (anon->an_page) { 740 pmap_page_protect(anon->an_page, prot); 741 } 742 } 743 } 744 745 /* 746 * amap_wipeout: wipeout all anon's in an amap; then free the amap! 747 * 748 * => Called from amap_unref(), when reference count drops to zero. 749 * => amap must be locked. 750 */ 751 752 void 753 amap_wipeout(struct vm_amap *amap) 754 { 755 u_int lcv; 756 757 UVMHIST_FUNC(__func__); 758 UVMHIST_CALLARGS(maphist,"(amap=%#jx)", (uintptr_t)amap, 0,0,0); 759 760 KASSERT(rw_write_held(amap->am_lock)); 761 KASSERT(amap->am_ref == 0); 762 763 if (__predict_false(amap->am_flags & AMAP_SWAPOFF)) { 764 /* 765 * Note: amap_swap_off() will call us again. 766 */ 767 amap_unlock(amap); 768 return; 769 } 770 771 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 772 struct vm_anon *anon; 773 u_int slot; 774 775 slot = amap->am_slots[lcv]; 776 anon = amap->am_anon[slot]; 777 KASSERT(anon != NULL && anon->an_ref != 0); 778 779 KASSERT(anon->an_lock == amap->am_lock); 780 UVMHIST_LOG(maphist," processing anon %#jx, ref=%jd", 781 (uintptr_t)anon, anon->an_ref, 0, 0); 782 783 /* 784 * Drop the reference. 785 */ 786 787 if (__predict_true(--anon->an_ref == 0)) { 788 uvm_anfree(anon); 789 } 790 if (__predict_false((lcv & 31) == 31)) { 791 preempt_point(); 792 } 793 } 794 795 /* 796 * Finally, destroy the amap. 797 */ 798 799 amap->am_nused = 0; 800 amap_unlock(amap); 801 amap_free(amap); 802 UVMHIST_LOG(maphist,"<- done!", 0,0,0,0); 803 } 804 805 /* 806 * amap_copy: ensure that a map entry's "needs_copy" flag is false 807 * by copying the amap if necessary. 808 * 809 * => an entry with a null amap pointer will get a new (blank) one. 810 * => the map that the map entry belongs to must be locked by caller. 811 * => the amap currently attached to "entry" (if any) must be unlocked. 812 * => if canchunk is true, then we may clip the entry into a chunk 813 * => "startva" and "endva" are used only if canchunk is true. they are 814 * used to limit chunking (e.g. if you have a large space that you 815 * know you are going to need to allocate amaps for, there is no point 816 * in allowing that to be chunked) 817 */ 818 819 void 820 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags, 821 vaddr_t startva, vaddr_t endva) 822 { 823 const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0; 824 struct vm_amap *amap, *srcamap; 825 u_int slots, lcv; 826 krwlock_t *oldlock; 827 vsize_t len; 828 829 UVMHIST_FUNC(__func__); 830 UVMHIST_CALLARGS(maphist, " (map=%#jx, entry=%#jx, flags=%#jx)", 831 (uintptr_t)map, (uintptr_t)entry, flags, -2); 832 833 KASSERT(map != kernel_map); /* we use nointr pool */ 834 835 srcamap = entry->aref.ar_amap; 836 len = entry->end - entry->start; 837 838 /* 839 * Is there an amap to copy? If not, create one. 840 */ 841 842 if (srcamap == NULL) { 843 const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0; 844 845 /* 846 * Check to see if we have a large amap that we can 847 * chunk. We align startva/endva to chunk-sized 848 * boundaries and then clip to them. 849 */ 850 851 if (canchunk && atop(len) >= UVM_AMAP_LARGE) { 852 vsize_t chunksize; 853 854 /* Convert slots to bytes. */ 855 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT; 856 startva = (startva / chunksize) * chunksize; 857 endva = roundup(endva, chunksize); 858 UVMHIST_LOG(maphist, 859 " chunk amap ==> clip %#jx->%#jx to %#jx->%#jx", 860 entry->start, entry->end, startva, endva); 861 UVM_MAP_CLIP_START(map, entry, startva); 862 863 /* Watch out for endva wrap-around! */ 864 if (endva >= startva) { 865 UVM_MAP_CLIP_END(map, entry, endva); 866 } 867 } 868 869 if ((flags & AMAP_COPY_NOMERGE) == 0 && 870 uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) { 871 return; 872 } 873 874 UVMHIST_LOG(maphist, "<- done [creating new amap %#jx->%#jx]", 875 entry->start, entry->end, 0, 0); 876 877 /* 878 * Allocate an initialised amap and install it. 879 * Note: we must update the length after clipping. 880 */ 881 len = entry->end - entry->start; 882 entry->aref.ar_pageoff = 0; 883 entry->aref.ar_amap = amap_alloc(len, 0, waitf); 884 if (entry->aref.ar_amap != NULL) { 885 entry->etype &= ~UVM_ET_NEEDSCOPY; 886 } 887 return; 888 } 889 890 /* 891 * First check and see if we are the only map entry referencing 892 * he amap we currently have. If so, then just take it over instead 893 * of copying it. Note that we are reading am_ref without lock held 894 * as the value value can only be one if we have the only reference 895 * to the amap (via our locked map). If the value is greater than 896 * one, then allocate amap and re-check the value. 897 */ 898 899 if (srcamap->am_ref == 1) { 900 entry->etype &= ~UVM_ET_NEEDSCOPY; 901 UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]", 902 0, 0, 0, 0); 903 return; 904 } 905 906 UVMHIST_LOG(maphist," amap=%#jx, ref=%jd, must copy it", 907 (uintptr_t)srcamap, srcamap->am_ref, 0, 0); 908 909 /* 910 * Allocate a new amap (note: not initialised, etc). 911 */ 912 913 AMAP_B2SLOT(slots, len); 914 amap = amap_alloc1(slots, 0, waitf); 915 if (amap == NULL) { 916 UVMHIST_LOG(maphist, " amap_alloc1 failed", 0,0,0,0); 917 return; 918 } 919 920 /* 921 * Make the new amap share the source amap's lock, and then lock 922 * both. We must do this before we set am_nused != 0, otherwise 923 * amap_swap_off() can become interested in the amap. 924 */ 925 926 oldlock = amap->am_lock; 927 mutex_enter(&amap_list_lock); 928 amap->am_lock = srcamap->am_lock; 929 mutex_exit(&amap_list_lock); 930 rw_obj_hold(amap->am_lock); 931 rw_obj_free(oldlock); 932 933 amap_lock(srcamap, RW_WRITER); 934 935 /* 936 * Re-check the reference count with the lock held. If it has 937 * dropped to one - we can take over the existing map. 938 */ 939 940 if (srcamap->am_ref == 1) { 941 /* Just take over the existing amap. */ 942 entry->etype &= ~UVM_ET_NEEDSCOPY; 943 amap_unlock(srcamap); 944 /* Destroy the new (unused) amap. */ 945 amap->am_ref--; 946 amap_free(amap); 947 return; 948 } 949 950 /* 951 * Copy the slots. Zero the padded part. 952 */ 953 954 UVMHIST_LOG(maphist, " copying amap now",0, 0, 0, 0); 955 for (lcv = 0 ; lcv < slots; lcv++) { 956 amap->am_anon[lcv] = 957 srcamap->am_anon[entry->aref.ar_pageoff + lcv]; 958 if (amap->am_anon[lcv] == NULL) 959 continue; 960 KASSERT(amap->am_anon[lcv]->an_lock == srcamap->am_lock); 961 KASSERT(amap->am_anon[lcv]->an_ref > 0); 962 KASSERT(amap->am_nused < amap->am_maxslot); 963 amap->am_anon[lcv]->an_ref++; 964 amap->am_bckptr[lcv] = amap->am_nused; 965 amap->am_slots[amap->am_nused] = lcv; 966 amap->am_nused++; 967 } 968 memset(&amap->am_anon[lcv], 0, 969 (amap->am_maxslot - lcv) * sizeof(struct vm_anon *)); 970 971 /* 972 * Drop our reference to the old amap (srcamap) and unlock. 973 * Since the reference count on srcamap is greater than one, 974 * (we checked above), it cannot drop to zero while it is locked. 975 */ 976 977 srcamap->am_ref--; 978 KASSERT(srcamap->am_ref > 0); 979 980 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) { 981 srcamap->am_flags &= ~AMAP_SHARED; 982 } 983 #ifdef UVM_AMAP_PPREF 984 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) { 985 amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 986 len >> PAGE_SHIFT, -1); 987 } 988 #endif 989 990 amap_unlock(srcamap); 991 992 /* 993 * Install new amap. 994 */ 995 996 entry->aref.ar_pageoff = 0; 997 entry->aref.ar_amap = amap; 998 entry->etype &= ~UVM_ET_NEEDSCOPY; 999 UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0); 1000 } 1001 1002 /* 1003 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2) 1004 * 1005 * called during fork(2) when the parent process has a wired map 1006 * entry. in that case we want to avoid write-protecting pages 1007 * in the parent's map (e.g. like what you'd do for a COW page) 1008 * so we resolve the COW here. 1009 * 1010 * => assume parent's entry was wired, thus all pages are resident. 1011 * => assume pages that are loaned out (loan_count) are already mapped 1012 * read-only in all maps, and thus no need for us to worry about them 1013 * => assume both parent and child vm_map's are locked 1014 * => caller passes child's map/entry in to us 1015 * => if we run out of memory we will unlock the amap and sleep _with_ the 1016 * parent and child vm_map's locked(!). we have to do this since 1017 * we are in the middle of a fork(2) and we can't let the parent 1018 * map change until we are done copying all the map entrys. 1019 * => XXXCDC: out of memory should cause fork to fail, but there is 1020 * currently no easy way to do this (needs fix) 1021 */ 1022 1023 void 1024 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry) 1025 { 1026 struct vm_amap *amap = entry->aref.ar_amap; 1027 struct vm_anon *anon, *nanon; 1028 struct vm_page *pg, *npg; 1029 u_int lcv, slot; 1030 1031 /* 1032 * note that if we unlock the amap then we must ReStart the "lcv" for 1033 * loop because some other process could reorder the anon's in the 1034 * am_anon[] array on us while the lock is dropped. 1035 */ 1036 1037 ReStart: 1038 amap_lock(amap, RW_WRITER); 1039 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 1040 slot = amap->am_slots[lcv]; 1041 anon = amap->am_anon[slot]; 1042 KASSERT(anon->an_lock == amap->am_lock); 1043 1044 /* 1045 * If anon has only one reference - we must have already 1046 * copied it. This can happen if we needed to sleep waiting 1047 * for memory in a previous run through this loop. The new 1048 * page might even have been paged out, since is not wired. 1049 */ 1050 1051 if (anon->an_ref == 1) { 1052 KASSERT(anon->an_page != NULL || anon->an_swslot != 0); 1053 continue; 1054 } 1055 1056 /* 1057 * The old page must be resident since the parent is wired. 1058 */ 1059 1060 pg = anon->an_page; 1061 KASSERT(pg != NULL); 1062 KASSERT(pg->wire_count > 0); 1063 1064 /* 1065 * If the page is loaned then it must already be mapped 1066 * read-only and we don't need to copy it. 1067 */ 1068 1069 if (pg->loan_count != 0) { 1070 continue; 1071 } 1072 KASSERT(pg->uanon == anon && pg->uobject == NULL); 1073 1074 /* 1075 * If the page is busy, then we have to unlock, wait for 1076 * it and then restart. 1077 */ 1078 1079 if (pg->flags & PG_BUSY) { 1080 uvm_pagewait(pg, amap->am_lock, "cownow"); 1081 goto ReStart; 1082 } 1083 1084 /* 1085 * Perform a copy-on-write. 1086 * First - get a new anon and a page. 1087 */ 1088 1089 nanon = uvm_analloc(); 1090 if (nanon) { 1091 nanon->an_lock = amap->am_lock; 1092 npg = uvm_pagealloc(NULL, 0, nanon, 0); 1093 } else { 1094 npg = NULL; 1095 } 1096 if (nanon == NULL || npg == NULL) { 1097 amap_unlock(amap); 1098 if (nanon) { 1099 nanon->an_lock = NULL; 1100 nanon->an_ref--; 1101 KASSERT(nanon->an_ref == 0); 1102 uvm_anfree(nanon); 1103 } 1104 uvm_wait("cownowpage"); 1105 goto ReStart; 1106 } 1107 1108 /* 1109 * Copy the data and replace anon with the new one. 1110 * Also, setup its lock (share the with amap's lock). 1111 */ 1112 1113 uvm_pagecopy(pg, npg); 1114 anon->an_ref--; 1115 KASSERT(anon->an_ref > 0); 1116 amap->am_anon[slot] = nanon; 1117 1118 /* 1119 * Drop PG_BUSY on new page. Since its owner was write 1120 * locked all this time - it cannot be PG_RELEASED or 1121 * waited on. 1122 */ 1123 uvm_pagelock(npg); 1124 uvm_pageactivate(npg); 1125 uvm_pageunlock(npg); 1126 npg->flags &= ~(PG_BUSY|PG_FAKE); 1127 UVM_PAGE_OWN(npg, NULL); 1128 } 1129 amap_unlock(amap); 1130 } 1131 1132 /* 1133 * amap_splitref: split a single reference into two separate references 1134 * 1135 * => called from uvm_map's clip routines 1136 * => origref's map should be locked 1137 * => origref->ar_amap should be unlocked (we will lock) 1138 */ 1139 void 1140 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset) 1141 { 1142 struct vm_amap *amap = origref->ar_amap; 1143 u_int leftslots; 1144 1145 KASSERT(splitref->ar_amap == origref->ar_amap); 1146 AMAP_B2SLOT(leftslots, offset); 1147 KASSERT(leftslots != 0); 1148 1149 amap_lock(amap, RW_WRITER); 1150 KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0); 1151 1152 #ifdef UVM_AMAP_PPREF 1153 /* Establish ppref before we add a duplicate reference to the amap. */ 1154 if (amap->am_ppref == NULL) { 1155 amap_pp_establish(amap, origref->ar_pageoff); 1156 } 1157 #endif 1158 /* Note: not a share reference. */ 1159 amap->am_ref++; 1160 splitref->ar_pageoff = origref->ar_pageoff + leftslots; 1161 amap_unlock(amap); 1162 } 1163 1164 #ifdef UVM_AMAP_PPREF 1165 1166 /* 1167 * amap_pp_establish: add a ppref array to an amap, if possible. 1168 * 1169 * => amap should be locked by caller. 1170 */ 1171 void 1172 amap_pp_establish(struct vm_amap *amap, vaddr_t offset) 1173 { 1174 const size_t sz = amap->am_maxslot * sizeof(*amap->am_ppref); 1175 1176 KASSERT(rw_write_held(amap->am_lock)); 1177 1178 amap->am_ppref = kmem_zalloc(sz, KM_NOSLEEP); 1179 if (amap->am_ppref == NULL) { 1180 /* Failure - just do not use ppref. */ 1181 amap->am_ppref = PPREF_NONE; 1182 return; 1183 } 1184 pp_setreflen(amap->am_ppref, 0, 0, offset); 1185 pp_setreflen(amap->am_ppref, offset, amap->am_ref, 1186 amap->am_nslot - offset); 1187 } 1188 1189 /* 1190 * amap_pp_adjref: adjust reference count to a part of an amap using the 1191 * per-page reference count array. 1192 * 1193 * => caller must check that ppref != PPREF_NONE before calling. 1194 * => map and amap must be locked. 1195 */ 1196 void 1197 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval) 1198 { 1199 int stopslot, *ppref, lcv, prevlcv; 1200 int ref, len, prevref, prevlen; 1201 1202 KASSERT(rw_write_held(amap->am_lock)); 1203 1204 stopslot = curslot + slotlen; 1205 ppref = amap->am_ppref; 1206 prevlcv = 0; 1207 1208 /* 1209 * Advance to the correct place in the array, fragment if needed. 1210 */ 1211 1212 for (lcv = 0 ; lcv < curslot ; lcv += len) { 1213 pp_getreflen(ppref, lcv, &ref, &len); 1214 if (lcv + len > curslot) { /* goes past start? */ 1215 pp_setreflen(ppref, lcv, ref, curslot - lcv); 1216 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv)); 1217 len = curslot - lcv; /* new length of entry @ lcv */ 1218 } 1219 prevlcv = lcv; 1220 } 1221 if (lcv == 0) { 1222 /* 1223 * Ensure that the "prevref == ref" test below always 1224 * fails, since we are starting from the beginning of 1225 * the ppref array; that is, there is no previous chunk. 1226 */ 1227 prevref = -1; 1228 prevlen = 0; 1229 } else { 1230 pp_getreflen(ppref, prevlcv, &prevref, &prevlen); 1231 } 1232 1233 /* 1234 * Now adjust reference counts in range. Merge the first 1235 * changed entry with the last unchanged entry if possible. 1236 */ 1237 KASSERT(lcv == curslot); 1238 for (/* lcv already set */; lcv < stopslot ; lcv += len) { 1239 pp_getreflen(ppref, lcv, &ref, &len); 1240 if (lcv + len > stopslot) { /* goes past end? */ 1241 pp_setreflen(ppref, lcv, ref, stopslot - lcv); 1242 pp_setreflen(ppref, stopslot, ref, 1243 len - (stopslot - lcv)); 1244 len = stopslot - lcv; 1245 } 1246 ref += adjval; 1247 KASSERT(ref >= 0); 1248 KASSERT(ref <= amap->am_ref); 1249 if (lcv == prevlcv + prevlen && ref == prevref) { 1250 pp_setreflen(ppref, prevlcv, ref, prevlen + len); 1251 } else { 1252 pp_setreflen(ppref, lcv, ref, len); 1253 } 1254 if (ref == 0) { 1255 amap_wiperange(amap, lcv, len); 1256 } 1257 } 1258 } 1259 1260 /* 1261 * amap_wiperange: wipe out a range of an amap. 1262 * Note: different from amap_wipeout because the amap is kept intact. 1263 * 1264 * => Both map and amap must be locked by caller. 1265 */ 1266 void 1267 amap_wiperange(struct vm_amap *amap, int slotoff, int slots) 1268 { 1269 u_int lcv, stop, slotend; 1270 bool byanon; 1271 1272 KASSERT(rw_write_held(amap->am_lock)); 1273 1274 /* 1275 * We can either traverse the amap by am_anon or by am_slots. 1276 * Determine which way is less expensive. 1277 */ 1278 1279 if (slots < amap->am_nused) { 1280 byanon = true; 1281 lcv = slotoff; 1282 stop = slotoff + slots; 1283 slotend = 0; 1284 } else { 1285 byanon = false; 1286 lcv = 0; 1287 stop = amap->am_nused; 1288 slotend = slotoff + slots; 1289 } 1290 1291 while (lcv < stop) { 1292 struct vm_anon *anon; 1293 u_int curslot, ptr, last; 1294 1295 if (byanon) { 1296 curslot = lcv++; /* lcv advances here */ 1297 if (amap->am_anon[curslot] == NULL) 1298 continue; 1299 } else { 1300 curslot = amap->am_slots[lcv]; 1301 if (curslot < slotoff || curslot >= slotend) { 1302 lcv++; /* lcv advances here */ 1303 continue; 1304 } 1305 stop--; /* drop stop, since anon will be removed */ 1306 } 1307 anon = amap->am_anon[curslot]; 1308 KASSERT(anon->an_lock == amap->am_lock); 1309 1310 /* 1311 * Remove anon from the amap. 1312 */ 1313 1314 amap->am_anon[curslot] = NULL; 1315 ptr = amap->am_bckptr[curslot]; 1316 last = amap->am_nused - 1; 1317 if (ptr != last) { 1318 amap->am_slots[ptr] = amap->am_slots[last]; 1319 amap->am_bckptr[amap->am_slots[ptr]] = ptr; 1320 } 1321 amap->am_nused--; 1322 1323 /* 1324 * Drop its reference count. 1325 */ 1326 1327 KASSERT(anon->an_lock == amap->am_lock); 1328 if (--anon->an_ref == 0) { 1329 uvm_anfree(anon); 1330 } 1331 } 1332 } 1333 1334 #endif 1335 1336 #if defined(VMSWAP) 1337 1338 /* 1339 * amap_swap_off: pagein anonymous pages in amaps and drop swap slots. 1340 * 1341 * => called with swap_syscall_lock held. 1342 * => note that we don't always traverse all anons. 1343 * eg. amaps being wiped out, released anons. 1344 * => return true if failed. 1345 */ 1346 1347 bool 1348 amap_swap_off(int startslot, int endslot) 1349 { 1350 struct vm_amap *am; 1351 struct vm_amap *am_next; 1352 struct vm_amap marker_prev; 1353 struct vm_amap marker_next; 1354 bool rv = false; 1355 1356 #if defined(DIAGNOSTIC) 1357 memset(&marker_prev, 0, sizeof(marker_prev)); 1358 memset(&marker_next, 0, sizeof(marker_next)); 1359 #endif /* defined(DIAGNOSTIC) */ 1360 1361 mutex_enter(&amap_list_lock); 1362 for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) { 1363 int i; 1364 1365 LIST_INSERT_BEFORE(am, &marker_prev, am_list); 1366 LIST_INSERT_AFTER(am, &marker_next, am_list); 1367 1368 /* amap_list_lock prevents the lock pointer from changing. */ 1369 if (!amap_lock_try(am, RW_WRITER)) { 1370 (void)kpause("amapswpo", false, 1, &amap_list_lock); 1371 am_next = LIST_NEXT(&marker_prev, am_list); 1372 if (am_next == &marker_next) { 1373 am_next = LIST_NEXT(am_next, am_list); 1374 } else { 1375 KASSERT(LIST_NEXT(am_next, am_list) == 1376 &marker_next); 1377 } 1378 LIST_REMOVE(&marker_prev, am_list); 1379 LIST_REMOVE(&marker_next, am_list); 1380 continue; 1381 } 1382 1383 mutex_exit(&amap_list_lock); 1384 1385 /* If am_nused == 0, the amap could be free - careful. */ 1386 for (i = 0; i < am->am_nused; i++) { 1387 int slot; 1388 int swslot; 1389 struct vm_anon *anon; 1390 1391 slot = am->am_slots[i]; 1392 anon = am->am_anon[slot]; 1393 KASSERT(anon->an_lock == am->am_lock); 1394 1395 swslot = anon->an_swslot; 1396 if (swslot < startslot || endslot <= swslot) { 1397 continue; 1398 } 1399 1400 am->am_flags |= AMAP_SWAPOFF; 1401 1402 rv = uvm_anon_pagein(am, anon); 1403 amap_lock(am, RW_WRITER); 1404 1405 am->am_flags &= ~AMAP_SWAPOFF; 1406 if (amap_refs(am) == 0) { 1407 amap_wipeout(am); 1408 am = NULL; 1409 break; 1410 } 1411 if (rv) { 1412 break; 1413 } 1414 i = 0; 1415 } 1416 1417 if (am) { 1418 amap_unlock(am); 1419 } 1420 1421 mutex_enter(&amap_list_lock); 1422 KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next || 1423 LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) == 1424 &marker_next); 1425 am_next = LIST_NEXT(&marker_next, am_list); 1426 LIST_REMOVE(&marker_prev, am_list); 1427 LIST_REMOVE(&marker_next, am_list); 1428 } 1429 mutex_exit(&amap_list_lock); 1430 1431 return rv; 1432 } 1433 1434 #endif /* defined(VMSWAP) */ 1435 1436 /* 1437 * amap_lookup: look up a page in an amap. 1438 * 1439 * => amap should be locked by caller. 1440 */ 1441 struct vm_anon * 1442 amap_lookup(struct vm_aref *aref, vaddr_t offset) 1443 { 1444 struct vm_amap *amap = aref->ar_amap; 1445 struct vm_anon *an; 1446 u_int slot; 1447 1448 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1449 KASSERT(rw_lock_held(amap->am_lock)); 1450 1451 AMAP_B2SLOT(slot, offset); 1452 slot += aref->ar_pageoff; 1453 an = amap->am_anon[slot]; 1454 1455 UVMHIST_LOG(maphist, 1456 "<- done (amap=%#jx, offset=%#jx, result=%#jx)", 1457 (uintptr_t)amap, offset, (uintptr_t)an, 0); 1458 1459 KASSERT(slot < amap->am_nslot); 1460 KASSERT(an == NULL || an->an_ref != 0); 1461 KASSERT(an == NULL || an->an_lock == amap->am_lock); 1462 return an; 1463 } 1464 1465 /* 1466 * amap_lookups: look up a range of pages in an amap. 1467 * 1468 * => amap should be locked by caller. 1469 */ 1470 void 1471 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons, 1472 int npages) 1473 { 1474 struct vm_amap *amap = aref->ar_amap; 1475 u_int slot; 1476 1477 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1478 KASSERT(rw_lock_held(amap->am_lock)); 1479 1480 AMAP_B2SLOT(slot, offset); 1481 slot += aref->ar_pageoff; 1482 1483 UVMHIST_LOG(maphist, " slot=%u, npages=%d, nslot=%d", 1484 slot, npages, amap->am_nslot, 0); 1485 1486 KASSERT((slot + (npages - 1)) < amap->am_nslot); 1487 memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *)); 1488 1489 #if defined(DIAGNOSTIC) 1490 for (int i = 0; i < npages; i++) { 1491 struct vm_anon * const an = anons[i]; 1492 if (an == NULL) { 1493 continue; 1494 } 1495 KASSERT(an->an_ref != 0); 1496 KASSERT(an->an_lock == amap->am_lock); 1497 } 1498 #endif 1499 UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0); 1500 } 1501 1502 /* 1503 * amap_add: add (or replace) a page to an amap. 1504 * 1505 * => amap should be locked by caller. 1506 * => anon must have the lock associated with this amap. 1507 */ 1508 void 1509 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon, 1510 bool replace) 1511 { 1512 struct vm_amap *amap = aref->ar_amap; 1513 u_int slot; 1514 1515 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1516 KASSERT(rw_write_held(amap->am_lock)); 1517 KASSERT(anon->an_lock == amap->am_lock); 1518 1519 AMAP_B2SLOT(slot, offset); 1520 slot += aref->ar_pageoff; 1521 KASSERT(slot < amap->am_nslot); 1522 1523 if (replace) { 1524 struct vm_anon *oanon = amap->am_anon[slot]; 1525 1526 KASSERT(oanon != NULL); 1527 if (oanon->an_page && (amap->am_flags & AMAP_SHARED) != 0) { 1528 pmap_page_protect(oanon->an_page, VM_PROT_NONE); 1529 /* 1530 * XXX: suppose page is supposed to be wired somewhere? 1531 */ 1532 } 1533 } else { 1534 KASSERT(amap->am_anon[slot] == NULL); 1535 KASSERT(amap->am_nused < amap->am_maxslot); 1536 amap->am_bckptr[slot] = amap->am_nused; 1537 amap->am_slots[amap->am_nused] = slot; 1538 amap->am_nused++; 1539 } 1540 amap->am_anon[slot] = anon; 1541 UVMHIST_LOG(maphist, 1542 "<- done (amap=%#jx, offset=%#x, anon=%#jx, rep=%d)", 1543 (uintptr_t)amap, offset, (uintptr_t)anon, replace); 1544 } 1545 1546 /* 1547 * amap_unadd: remove a page from an amap. 1548 * 1549 * => amap should be locked by caller. 1550 */ 1551 void 1552 amap_unadd(struct vm_aref *aref, vaddr_t offset) 1553 { 1554 struct vm_amap *amap = aref->ar_amap; 1555 u_int slot, ptr, last; 1556 1557 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1558 KASSERT(rw_write_held(amap->am_lock)); 1559 1560 AMAP_B2SLOT(slot, offset); 1561 slot += aref->ar_pageoff; 1562 KASSERT(slot < amap->am_nslot); 1563 KASSERT(amap->am_anon[slot] != NULL); 1564 KASSERT(amap->am_anon[slot]->an_lock == amap->am_lock); 1565 1566 amap->am_anon[slot] = NULL; 1567 ptr = amap->am_bckptr[slot]; 1568 1569 last = amap->am_nused - 1; 1570 if (ptr != last) { 1571 /* Move the last entry to keep the slots contiguous. */ 1572 amap->am_slots[ptr] = amap->am_slots[last]; 1573 amap->am_bckptr[amap->am_slots[ptr]] = ptr; 1574 } 1575 amap->am_nused--; 1576 UVMHIST_LOG(maphist, "<- done (amap=%#jx, slot=%#jx)", 1577 (uintptr_t)amap, slot,0, 0); 1578 } 1579 1580 /* 1581 * amap_adjref_anons: adjust the reference count(s) on amap and its anons. 1582 */ 1583 static void 1584 amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len, 1585 int refv, bool all) 1586 { 1587 1588 #ifdef UVM_AMAP_PPREF 1589 KASSERT(rw_write_held(amap->am_lock)); 1590 1591 /* 1592 * We must establish the ppref array before changing am_ref 1593 * so that the ppref values match the current amap refcount. 1594 */ 1595 1596 if (amap->am_ppref == NULL) { 1597 amap_pp_establish(amap, offset); 1598 } 1599 #endif 1600 1601 amap->am_ref += refv; 1602 1603 #ifdef UVM_AMAP_PPREF 1604 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 1605 amap_pp_adjref(amap, offset, len, refv); 1606 } 1607 #endif 1608 amap_unlock(amap); 1609 } 1610 1611 /* 1612 * amap_ref: gain a reference to an amap. 1613 * 1614 * => amap must not be locked (we will lock). 1615 * => "offset" and "len" are in units of pages. 1616 * => Called at fork time to gain the child's reference. 1617 */ 1618 void 1619 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags) 1620 { 1621 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1622 1623 amap_lock(amap, RW_WRITER); 1624 if (flags & AMAP_SHARED) { 1625 amap->am_flags |= AMAP_SHARED; 1626 } 1627 amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0); 1628 1629 UVMHIST_LOG(maphist,"<- done! amap=%#jx", (uintptr_t)amap, 0, 0, 0); 1630 } 1631 1632 /* 1633 * amap_unref: remove a reference to an amap. 1634 * 1635 * => All pmap-level references to this amap must be already removed. 1636 * => Called from uvm_unmap_detach(); entry is already removed from the map. 1637 * => We will lock amap, so it must be unlocked. 1638 */ 1639 void 1640 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all) 1641 { 1642 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1643 1644 amap_lock(amap, RW_WRITER); 1645 1646 UVMHIST_LOG(maphist," amap=%#jx refs=%d, nused=%d", 1647 (uintptr_t)amap, amap->am_ref, amap->am_nused, 0); 1648 KASSERT(amap->am_ref > 0); 1649 1650 if (amap->am_ref == 1) { 1651 1652 /* 1653 * If the last reference - wipeout and destroy the amap. 1654 */ 1655 amap->am_ref--; 1656 amap_wipeout(amap); 1657 UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0); 1658 return; 1659 } 1660 1661 /* 1662 * Otherwise, drop the reference count(s) on anons. 1663 */ 1664 1665 if (amap->am_ref == 2 && (amap->am_flags & AMAP_SHARED) != 0) { 1666 amap->am_flags &= ~AMAP_SHARED; 1667 } 1668 amap_adjref_anons(amap, offset, len, -1, all); 1669 1670 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1671 } 1672