1 /* $NetBSD: uvm_amap.c,v 1.55 2004/05/12 20:09:50 yamt Exp $ */ 2 3 /* 4 * 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Charles D. Cranor and 19 * Washington University. 20 * 4. The name of the author may not be used to endorse or promote products 21 * derived from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * uvm_amap.c: amap operations 37 */ 38 39 /* 40 * this file contains functions that perform operations on amaps. see 41 * uvm_amap.h for a brief explanation of the role of amaps in uvm. 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.55 2004/05/12 20:09:50 yamt Exp $"); 46 47 #undef UVM_AMAP_INLINE /* enable/disable amap inlines */ 48 49 #include "opt_uvmhist.h" 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/proc.h> 54 #include <sys/malloc.h> 55 #include <sys/kernel.h> 56 #include <sys/pool.h> 57 58 #define UVM_AMAP_C /* ensure disabled inlines are in */ 59 #include <uvm/uvm.h> 60 #include <uvm/uvm_swap.h> 61 62 /* 63 * pool for allocation of vm_map structures. note that the pool has 64 * its own simplelock for its protection. also note that in order to 65 * avoid an endless loop, the amap pool's allocator cannot allocate 66 * memory from an amap (it currently goes through the kernel uobj, so 67 * we are ok). 68 */ 69 POOL_INIT(uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0, "amappl", 70 &pool_allocator_nointr); 71 72 MALLOC_DEFINE(M_UVMAMAP, "UVM amap", "UVM amap and related structures"); 73 74 /* 75 * local functions 76 */ 77 78 static struct vm_amap *amap_alloc1(int, int, int); 79 80 #ifdef UVM_AMAP_PPREF 81 /* 82 * what is ppref? ppref is an _optional_ amap feature which is used 83 * to keep track of reference counts on a per-page basis. it is enabled 84 * when UVM_AMAP_PPREF is defined. 85 * 86 * when enabled, an array of ints is allocated for the pprefs. this 87 * array is allocated only when a partial reference is added to the 88 * map (either by unmapping part of the amap, or gaining a reference 89 * to only a part of an amap). if the malloc of the array fails 90 * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate 91 * that we tried to do ppref's but couldn't alloc the array so just 92 * give up (after all, this is an optional feature!). 93 * 94 * the array is divided into page sized "chunks." for chunks of length 1, 95 * the chunk reference count plus one is stored in that chunk's slot. 96 * for chunks of length > 1 the first slot contains (the reference count 97 * plus one) * -1. [the negative value indicates that the length is 98 * greater than one.] the second slot of the chunk contains the length 99 * of the chunk. here is an example: 100 * 101 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1 102 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x 103 * <----------><-><----><-------><----><-><-------> 104 * (x = don't care) 105 * 106 * this allows us to allow one int to contain the ref count for the whole 107 * chunk. note that the "plus one" part is needed because a reference 108 * count of zero is neither positive or negative (need a way to tell 109 * if we've got one zero or a bunch of them). 110 * 111 * here are some in-line functions to help us. 112 */ 113 114 static __inline void pp_getreflen(int *, int, int *, int *); 115 static __inline void pp_setreflen(int *, int, int, int); 116 117 /* 118 * pp_getreflen: get the reference and length for a specific offset 119 * 120 * => ppref's amap must be locked 121 */ 122 static __inline void 123 pp_getreflen(ppref, offset, refp, lenp) 124 int *ppref, offset, *refp, *lenp; 125 { 126 127 if (ppref[offset] > 0) { /* chunk size must be 1 */ 128 *refp = ppref[offset] - 1; /* don't forget to adjust */ 129 *lenp = 1; 130 } else { 131 *refp = (ppref[offset] * -1) - 1; 132 *lenp = ppref[offset+1]; 133 } 134 } 135 136 /* 137 * pp_setreflen: set the reference and length for a specific offset 138 * 139 * => ppref's amap must be locked 140 */ 141 static __inline void 142 pp_setreflen(ppref, offset, ref, len) 143 int *ppref, offset, ref, len; 144 { 145 if (len == 0) 146 return; 147 if (len == 1) { 148 ppref[offset] = ref + 1; 149 } else { 150 ppref[offset] = (ref + 1) * -1; 151 ppref[offset+1] = len; 152 } 153 } 154 #endif 155 156 /* 157 * amap_alloc1: internal function that allocates an amap, but does not 158 * init the overlay. 159 * 160 * => lock on returned amap is init'd 161 */ 162 static inline struct vm_amap * 163 amap_alloc1(slots, padslots, waitf) 164 int slots, padslots, waitf; 165 { 166 struct vm_amap *amap; 167 int totalslots; 168 169 amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0); 170 if (amap == NULL) 171 return(NULL); 172 173 totalslots = malloc_roundup((slots + padslots) * sizeof(int)) / 174 sizeof(int); 175 simple_lock_init(&amap->am_l); 176 amap->am_ref = 1; 177 amap->am_flags = 0; 178 #ifdef UVM_AMAP_PPREF 179 amap->am_ppref = NULL; 180 #endif 181 amap->am_maxslot = totalslots; 182 amap->am_nslot = slots; 183 amap->am_nused = 0; 184 185 amap->am_slots = malloc(totalslots * sizeof(int), M_UVMAMAP, 186 waitf); 187 if (amap->am_slots == NULL) 188 goto fail1; 189 190 amap->am_bckptr = malloc(totalslots * sizeof(int), M_UVMAMAP, waitf); 191 if (amap->am_bckptr == NULL) 192 goto fail2; 193 194 amap->am_anon = malloc(totalslots * sizeof(struct vm_anon *), 195 M_UVMAMAP, waitf); 196 if (amap->am_anon == NULL) 197 goto fail3; 198 199 return(amap); 200 201 fail3: 202 free(amap->am_bckptr, M_UVMAMAP); 203 fail2: 204 free(amap->am_slots, M_UVMAMAP); 205 fail1: 206 pool_put(&uvm_amap_pool, amap); 207 return (NULL); 208 } 209 210 /* 211 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM 212 * 213 * => caller should ensure sz is a multiple of PAGE_SIZE 214 * => reference count to new amap is set to one 215 * => new amap is returned unlocked 216 */ 217 218 struct vm_amap * 219 amap_alloc(sz, padsz, waitf) 220 vaddr_t sz, padsz; 221 int waitf; 222 { 223 struct vm_amap *amap; 224 int slots, padslots; 225 UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist); 226 227 AMAP_B2SLOT(slots, sz); 228 AMAP_B2SLOT(padslots, padsz); 229 230 amap = amap_alloc1(slots, padslots, waitf); 231 if (amap) 232 memset(amap->am_anon, 0, 233 amap->am_maxslot * sizeof(struct vm_anon *)); 234 235 UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0); 236 return(amap); 237 } 238 239 240 /* 241 * amap_free: free an amap 242 * 243 * => the amap must be unlocked 244 * => the amap should have a zero reference count and be empty 245 */ 246 void 247 amap_free(amap) 248 struct vm_amap *amap; 249 { 250 UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist); 251 252 KASSERT(amap->am_ref == 0 && amap->am_nused == 0); 253 LOCK_ASSERT(!simple_lock_held(&amap->am_l)); 254 free(amap->am_slots, M_UVMAMAP); 255 free(amap->am_bckptr, M_UVMAMAP); 256 free(amap->am_anon, M_UVMAMAP); 257 #ifdef UVM_AMAP_PPREF 258 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) 259 free(amap->am_ppref, M_UVMAMAP); 260 #endif 261 pool_put(&uvm_amap_pool, amap); 262 UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0); 263 } 264 265 /* 266 * amap_extend: extend the size of an amap (if needed) 267 * 268 * => called from uvm_map when we want to extend an amap to cover 269 * a new mapping (rather than allocate a new one) 270 * => amap should be unlocked (we will lock it) 271 * => to safely extend an amap it should have a reference count of 272 * one (thus it can't be shared) 273 */ 274 int 275 amap_extend(entry, addsize, flags) 276 struct vm_map_entry *entry; 277 vsize_t addsize; 278 int flags; 279 { 280 struct vm_amap *amap = entry->aref.ar_amap; 281 int slotoff = entry->aref.ar_pageoff; 282 int slotmapped, slotadd, slotneed, slotadded, slotalloc; 283 int slotadj, slotspace; 284 #ifdef UVM_AMAP_PPREF 285 int *newppref, *oldppref; 286 #endif 287 int i, *newsl, *newbck, *oldsl, *oldbck; 288 struct vm_anon **newover, **oldover; 289 int mflag = (flags & AMAP_EXTEND_NOWAIT) ? M_NOWAIT : 290 (M_WAITOK | M_CANFAIL); 291 292 UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist); 293 294 UVMHIST_LOG(maphist, " (entry=0x%x, addsize=0x%x, flags=0x%x)", 295 entry, addsize, flags, 0); 296 297 /* 298 * first, determine how many slots we need in the amap. don't 299 * forget that ar_pageoff could be non-zero: this means that 300 * there are some unused slots before us in the amap. 301 */ 302 303 amap_lock(amap); 304 KASSERT(amap_refs(amap) == 1); /* amap can't be shared */ 305 AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */ 306 AMAP_B2SLOT(slotadd, addsize); /* slots to add */ 307 if (flags & AMAP_EXTEND_FORWARDS) { 308 slotneed = slotoff + slotmapped + slotadd; 309 slotadj = 0; 310 slotspace = 0; 311 } 312 else { 313 slotneed = slotadd + slotmapped; 314 slotadj = slotadd - slotoff; 315 slotspace = amap->am_maxslot - slotmapped; 316 } 317 318 /* 319 * case 1: we already have enough slots in the map and thus 320 * only need to bump the reference counts on the slots we are 321 * adding. 322 */ 323 324 if (flags & AMAP_EXTEND_FORWARDS) { 325 if (amap->am_nslot >= slotneed) { 326 #ifdef UVM_AMAP_PPREF 327 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 328 amap_pp_adjref(amap, slotoff + slotmapped, 329 slotadd, 1); 330 } 331 #endif 332 amap_unlock(amap); 333 UVMHIST_LOG(maphist, 334 "<- done (case 1f), amap = 0x%x, sltneed=%d", 335 amap, slotneed, 0, 0); 336 return 0; 337 } 338 } else { 339 if (slotadj <= 0) { 340 slotoff -= slotadd; 341 entry->aref.ar_pageoff = slotoff; 342 #ifdef UVM_AMAP_PPREF 343 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 344 amap_pp_adjref(amap, slotoff, slotadd, 1); 345 } 346 #endif 347 amap_unlock(amap); 348 UVMHIST_LOG(maphist, 349 "<- done (case 1b), amap = 0x%x, sltneed=%d", 350 amap, slotneed, 0, 0); 351 return 0; 352 } 353 } 354 355 /* 356 * case 2: we pre-allocated slots for use and we just need to 357 * bump nslot up to take account for these slots. 358 */ 359 360 if (amap->am_maxslot >= slotneed) { 361 if (flags & AMAP_EXTEND_FORWARDS) { 362 #ifdef UVM_AMAP_PPREF 363 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 364 if ((slotoff + slotmapped) < amap->am_nslot) 365 amap_pp_adjref(amap, 366 slotoff + slotmapped, 367 (amap->am_nslot - 368 (slotoff + slotmapped)), 1); 369 pp_setreflen(amap->am_ppref, amap->am_nslot, 1, 370 slotneed - amap->am_nslot); 371 } 372 #endif 373 amap->am_nslot = slotneed; 374 amap_unlock(amap); 375 376 /* 377 * no need to zero am_anon since that was done at 378 * alloc time and we never shrink an allocation. 379 */ 380 381 UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, " 382 "slotneed=%d", amap, slotneed, 0, 0); 383 return 0; 384 } else { 385 #ifdef UVM_AMAP_PPREF 386 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 387 /* 388 * Slide up the ref counts on the pages that 389 * are actually in use. 390 */ 391 memmove(amap->am_ppref + slotspace, 392 amap->am_ppref + slotoff, 393 slotmapped * sizeof(int)); 394 /* 395 * Mark the (adjusted) gap at the front as 396 * referenced/not referenced. 397 */ 398 pp_setreflen(amap->am_ppref, 399 0, 0, slotspace - slotadd); 400 pp_setreflen(amap->am_ppref, 401 slotspace - slotadd, 1, slotadd); 402 } 403 #endif 404 405 /* 406 * Slide the anon pointers up and clear out 407 * the space we just made. 408 */ 409 memmove(amap->am_anon + slotspace, 410 amap->am_anon + slotoff, 411 slotmapped * sizeof(struct vm_anon*)); 412 memset(amap->am_anon + slotoff, 0, 413 (slotspace - slotoff) * sizeof(struct vm_anon *)); 414 415 /* 416 * Slide the backpointers up, but don't bother 417 * wiping out the old slots. 418 */ 419 memmove(amap->am_bckptr + slotspace, 420 amap->am_bckptr + slotoff, 421 slotmapped * sizeof(int)); 422 423 /* 424 * Adjust all the useful active slot numbers. 425 */ 426 for (i = 0; i < amap->am_nused; i++) 427 amap->am_slots[i] += (slotspace - slotoff); 428 429 /* 430 * We just filled all the empty space in the 431 * front of the amap by activating a few new 432 * slots. 433 */ 434 amap->am_nslot = amap->am_maxslot; 435 entry->aref.ar_pageoff = slotspace - slotadd; 436 amap_unlock(amap); 437 438 UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, " 439 "slotneed=%d", amap, slotneed, 0, 0); 440 return 0; 441 } 442 } 443 444 /* 445 * case 3: we need to malloc a new amap and copy all the amap 446 * data over from old amap to the new one. 447 * 448 * note that the use of a kernel realloc() probably would not 449 * help here, since we wish to abort cleanly if one of the 450 * three (or four) mallocs fails. 451 */ 452 453 amap_unlock(amap); /* unlock in case we sleep in malloc */ 454 slotalloc = malloc_roundup(slotneed * sizeof(int)) / sizeof(int); 455 #ifdef UVM_AMAP_PPREF 456 newppref = NULL; 457 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) 458 newppref = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag); 459 #endif 460 newsl = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag); 461 newbck = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag); 462 newover = malloc(slotalloc * sizeof(struct vm_anon *), M_UVMAMAP, 463 mflag); 464 if (newsl == NULL || newbck == NULL || newover == NULL) { 465 #ifdef UVM_AMAP_PPREF 466 if (newppref != NULL) { 467 free(newppref, M_UVMAMAP); 468 } 469 #endif 470 if (newsl != NULL) { 471 free(newsl, M_UVMAMAP); 472 } 473 if (newbck != NULL) { 474 free(newbck, M_UVMAMAP); 475 } 476 if (newover != NULL) { 477 free(newover, M_UVMAMAP); 478 } 479 return ENOMEM; 480 } 481 amap_lock(amap); 482 KASSERT(amap->am_maxslot < slotneed); 483 484 /* 485 * now copy everything over to new malloc'd areas... 486 */ 487 488 slotadded = slotalloc - amap->am_nslot; 489 if (!(flags & AMAP_EXTEND_FORWARDS)) 490 slotspace = slotalloc - slotmapped; 491 492 /* do am_slots */ 493 oldsl = amap->am_slots; 494 if (flags & AMAP_EXTEND_FORWARDS) 495 memcpy(newsl, oldsl, sizeof(int) * amap->am_nused); 496 else 497 for (i = 0; i < amap->am_nused; i++) 498 newsl[i] = oldsl[i] + slotspace - slotoff; 499 amap->am_slots = newsl; 500 501 /* do am_anon */ 502 oldover = amap->am_anon; 503 if (flags & AMAP_EXTEND_FORWARDS) { 504 memcpy(newover, oldover, 505 sizeof(struct vm_anon *) * amap->am_nslot); 506 memset(newover + amap->am_nslot, 0, 507 sizeof(struct vm_anon *) * slotadded); 508 } else { 509 memcpy(newover + slotspace, oldover + slotoff, 510 sizeof(struct vm_anon *) * slotmapped); 511 memset(newover, 0, 512 sizeof(struct vm_anon *) * slotspace); 513 } 514 amap->am_anon = newover; 515 516 /* do am_bckptr */ 517 oldbck = amap->am_bckptr; 518 if (flags & AMAP_EXTEND_FORWARDS) 519 memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot); 520 else 521 memcpy(newbck + slotspace, oldbck + slotoff, 522 sizeof(int) * slotmapped); 523 amap->am_bckptr = newbck; 524 525 #ifdef UVM_AMAP_PPREF 526 /* do ppref */ 527 oldppref = amap->am_ppref; 528 if (newppref) { 529 if (flags & AMAP_EXTEND_FORWARDS) { 530 memcpy(newppref, oldppref, 531 sizeof(int) * amap->am_nslot); 532 memset(newppref + amap->am_nslot, 0, 533 sizeof(int) * slotadded); 534 } else { 535 memcpy(newppref + slotspace, oldppref + slotoff, 536 sizeof(int) * slotmapped); 537 } 538 amap->am_ppref = newppref; 539 if ((flags & AMAP_EXTEND_FORWARDS) && 540 (slotoff + slotmapped) < amap->am_nslot) 541 amap_pp_adjref(amap, slotoff + slotmapped, 542 (amap->am_nslot - (slotoff + slotmapped)), 1); 543 if (flags & AMAP_EXTEND_FORWARDS) 544 pp_setreflen(newppref, amap->am_nslot, 1, 545 slotneed - amap->am_nslot); 546 else { 547 pp_setreflen(newppref, 0, 0, 548 slotalloc - slotneed); 549 pp_setreflen(newppref, slotalloc - slotneed, 1, 550 slotneed - slotmapped); 551 } 552 } else { 553 if (amap->am_ppref) 554 amap->am_ppref = PPREF_NONE; 555 } 556 #endif 557 558 /* update master values */ 559 if (flags & AMAP_EXTEND_FORWARDS) 560 amap->am_nslot = slotneed; 561 else { 562 entry->aref.ar_pageoff = slotspace - slotadd; 563 amap->am_nslot = slotalloc; 564 } 565 amap->am_maxslot = slotalloc; 566 567 amap_unlock(amap); 568 free(oldsl, M_UVMAMAP); 569 free(oldbck, M_UVMAMAP); 570 free(oldover, M_UVMAMAP); 571 #ifdef UVM_AMAP_PPREF 572 if (oldppref && oldppref != PPREF_NONE) 573 free(oldppref, M_UVMAMAP); 574 #endif 575 UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d", 576 amap, slotneed, 0, 0); 577 return 0; 578 } 579 580 /* 581 * amap_share_protect: change protection of anons in a shared amap 582 * 583 * for shared amaps, given the current data structure layout, it is 584 * not possible for us to directly locate all maps referencing the 585 * shared anon (to change the protection). in order to protect data 586 * in shared maps we use pmap_page_protect(). [this is useful for IPC 587 * mechanisms like map entry passing that may want to write-protect 588 * all mappings of a shared amap.] we traverse am_anon or am_slots 589 * depending on the current state of the amap. 590 * 591 * => entry's map and amap must be locked by the caller 592 */ 593 void 594 amap_share_protect(entry, prot) 595 struct vm_map_entry *entry; 596 vm_prot_t prot; 597 { 598 struct vm_amap *amap = entry->aref.ar_amap; 599 int slots, lcv, slot, stop; 600 601 LOCK_ASSERT(simple_lock_held(&amap->am_l)); 602 603 AMAP_B2SLOT(slots, (entry->end - entry->start)); 604 stop = entry->aref.ar_pageoff + slots; 605 606 if (slots < amap->am_nused) { 607 /* cheaper to traverse am_anon */ 608 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) { 609 if (amap->am_anon[lcv] == NULL) 610 continue; 611 if (amap->am_anon[lcv]->u.an_page != NULL) 612 pmap_page_protect(amap->am_anon[lcv]->u.an_page, 613 prot); 614 } 615 return; 616 } 617 618 /* cheaper to traverse am_slots */ 619 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 620 slot = amap->am_slots[lcv]; 621 if (slot < entry->aref.ar_pageoff || slot >= stop) 622 continue; 623 if (amap->am_anon[slot]->u.an_page != NULL) 624 pmap_page_protect(amap->am_anon[slot]->u.an_page, prot); 625 } 626 } 627 628 /* 629 * amap_wipeout: wipeout all anon's in an amap; then free the amap! 630 * 631 * => called from amap_unref when the final reference to an amap is 632 * discarded (i.e. when reference count == 1) 633 * => the amap should be locked (by the caller) 634 */ 635 636 void 637 amap_wipeout(amap) 638 struct vm_amap *amap; 639 { 640 int lcv, slot; 641 struct vm_anon *anon; 642 UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist); 643 UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0); 644 645 amap_unlock(amap); 646 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 647 int refs; 648 649 slot = amap->am_slots[lcv]; 650 anon = amap->am_anon[slot]; 651 652 if (anon == NULL || anon->an_ref == 0) 653 panic("amap_wipeout: corrupt amap"); 654 655 simple_lock(&anon->an_lock); 656 UVMHIST_LOG(maphist," processing anon 0x%x, ref=%d", anon, 657 anon->an_ref, 0, 0); 658 refs = --anon->an_ref; 659 simple_unlock(&anon->an_lock); 660 if (refs == 0) { 661 662 /* 663 * we had the last reference to a vm_anon. free it. 664 */ 665 666 uvm_anfree(anon); 667 } 668 669 /* 670 * XXX 671 * releasing the swap space held by an N anons is an O(N^2) 672 * operation because of the implementation of extents. 673 * if there are many anons, tearing down an exiting process' 674 * address space can take many seconds, which causes very 675 * annoying pauses. we yield here to give other processes 676 * a chance to run. this should be removed once the performance 677 * of swap space management is improved. 678 */ 679 680 if (curlwp->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) 681 preempt(1); 682 } 683 684 /* 685 * now we free the map 686 */ 687 688 amap->am_ref = 0; /* ... was one */ 689 amap->am_nused = 0; 690 amap_free(amap); /* will unlock and free amap */ 691 UVMHIST_LOG(maphist,"<- done!", 0,0,0,0); 692 } 693 694 /* 695 * amap_copy: ensure that a map entry's "needs_copy" flag is false 696 * by copying the amap if necessary. 697 * 698 * => an entry with a null amap pointer will get a new (blank) one. 699 * => the map that the map entry belongs to must be locked by caller. 700 * => the amap currently attached to "entry" (if any) must be unlocked. 701 * => if canchunk is true, then we may clip the entry into a chunk 702 * => "startva" and "endva" are used only if canchunk is true. they are 703 * used to limit chunking (e.g. if you have a large space that you 704 * know you are going to need to allocate amaps for, there is no point 705 * in allowing that to be chunked) 706 */ 707 708 void 709 amap_copy(map, entry, waitf, canchunk, startva, endva) 710 struct vm_map *map; 711 struct vm_map_entry *entry; 712 int waitf; 713 boolean_t canchunk; 714 vaddr_t startva, endva; 715 { 716 struct vm_amap *amap, *srcamap; 717 int slots, lcv; 718 vaddr_t chunksize; 719 UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist); 720 UVMHIST_LOG(maphist, " (map=%p, entry=%p, waitf=%d)", 721 map, entry, waitf, 0); 722 723 /* 724 * is there a map to copy? if not, create one from scratch. 725 */ 726 727 if (entry->aref.ar_amap == NULL) { 728 729 /* 730 * check to see if we have a large amap that we can 731 * chunk. we align startva/endva to chunk-sized 732 * boundaries and then clip to them. 733 */ 734 735 if (canchunk && atop(entry->end - entry->start) >= 736 UVM_AMAP_LARGE) { 737 /* convert slots to bytes */ 738 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT; 739 startva = (startva / chunksize) * chunksize; 740 endva = roundup(endva, chunksize); 741 UVMHIST_LOG(maphist, " chunk amap ==> clip 0x%x->0x%x" 742 "to 0x%x->0x%x", entry->start, entry->end, startva, 743 endva); 744 UVM_MAP_CLIP_START(map, entry, startva); 745 /* watch out for endva wrap-around! */ 746 if (endva >= startva) 747 UVM_MAP_CLIP_END(map, entry, endva); 748 } 749 750 UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]", 751 entry->start, entry->end, 0, 0); 752 entry->aref.ar_pageoff = 0; 753 entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0, 754 waitf); 755 if (entry->aref.ar_amap != NULL) 756 entry->etype &= ~UVM_ET_NEEDSCOPY; 757 return; 758 } 759 760 /* 761 * first check and see if we are the only map entry 762 * referencing the amap we currently have. if so, then we can 763 * just take it over rather than copying it. note that we are 764 * reading am_ref with the amap unlocked... the value can only 765 * be one if we have the only reference to the amap (via our 766 * locked map). if we are greater than one we fall through to 767 * the next case (where we double check the value). 768 */ 769 770 if (entry->aref.ar_amap->am_ref == 1) { 771 entry->etype &= ~UVM_ET_NEEDSCOPY; 772 UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]", 773 0, 0, 0, 0); 774 return; 775 } 776 777 /* 778 * looks like we need to copy the map. 779 */ 780 781 UVMHIST_LOG(maphist," amap=%p, ref=%d, must copy it", 782 entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0); 783 AMAP_B2SLOT(slots, entry->end - entry->start); 784 amap = amap_alloc1(slots, 0, waitf); 785 if (amap == NULL) { 786 UVMHIST_LOG(maphist, " amap_alloc1 failed", 0,0,0,0); 787 return; 788 } 789 srcamap = entry->aref.ar_amap; 790 amap_lock(srcamap); 791 792 /* 793 * need to double check reference count now that we've got the 794 * src amap locked down. the reference count could have 795 * changed while we were in malloc. if the reference count 796 * dropped down to one we take over the old map rather than 797 * copying the amap. 798 */ 799 800 if (srcamap->am_ref == 1) { /* take it over? */ 801 entry->etype &= ~UVM_ET_NEEDSCOPY; 802 amap->am_ref--; /* drop final reference to map */ 803 amap_free(amap); /* dispose of new (unused) amap */ 804 amap_unlock(srcamap); 805 return; 806 } 807 808 /* 809 * we must copy it now. 810 */ 811 812 UVMHIST_LOG(maphist, " copying amap now",0, 0, 0, 0); 813 for (lcv = 0 ; lcv < slots; lcv++) { 814 amap->am_anon[lcv] = 815 srcamap->am_anon[entry->aref.ar_pageoff + lcv]; 816 if (amap->am_anon[lcv] == NULL) 817 continue; 818 simple_lock(&amap->am_anon[lcv]->an_lock); 819 amap->am_anon[lcv]->an_ref++; 820 simple_unlock(&amap->am_anon[lcv]->an_lock); 821 amap->am_bckptr[lcv] = amap->am_nused; 822 amap->am_slots[amap->am_nused] = lcv; 823 amap->am_nused++; 824 } 825 memset(&amap->am_anon[lcv], 0, 826 (amap->am_maxslot - lcv) * sizeof(struct vm_anon *)); 827 828 /* 829 * drop our reference to the old amap (srcamap) and unlock. 830 * we know that the reference count on srcamap is greater than 831 * one (we checked above), so there is no way we could drop 832 * the count to zero. [and no need to worry about freeing it] 833 */ 834 835 srcamap->am_ref--; 836 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) 837 srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 838 #ifdef UVM_AMAP_PPREF 839 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) { 840 amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 841 (entry->end - entry->start) >> PAGE_SHIFT, -1); 842 } 843 #endif 844 845 amap_unlock(srcamap); 846 847 /* 848 * install new amap. 849 */ 850 851 entry->aref.ar_pageoff = 0; 852 entry->aref.ar_amap = amap; 853 entry->etype &= ~UVM_ET_NEEDSCOPY; 854 UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0); 855 } 856 857 /* 858 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2) 859 * 860 * called during fork(2) when the parent process has a wired map 861 * entry. in that case we want to avoid write-protecting pages 862 * in the parent's map (e.g. like what you'd do for a COW page) 863 * so we resolve the COW here. 864 * 865 * => assume parent's entry was wired, thus all pages are resident. 866 * => assume pages that are loaned out (loan_count) are already mapped 867 * read-only in all maps, and thus no need for us to worry about them 868 * => assume both parent and child vm_map's are locked 869 * => caller passes child's map/entry in to us 870 * => if we run out of memory we will unlock the amap and sleep _with_ the 871 * parent and child vm_map's locked(!). we have to do this since 872 * we are in the middle of a fork(2) and we can't let the parent 873 * map change until we are done copying all the map entrys. 874 * => XXXCDC: out of memory should cause fork to fail, but there is 875 * currently no easy way to do this (needs fix) 876 * => page queues must be unlocked (we may lock them) 877 */ 878 879 void 880 amap_cow_now(map, entry) 881 struct vm_map *map; 882 struct vm_map_entry *entry; 883 { 884 struct vm_amap *amap = entry->aref.ar_amap; 885 int lcv, slot; 886 struct vm_anon *anon, *nanon; 887 struct vm_page *pg, *npg; 888 889 /* 890 * note that if we unlock the amap then we must ReStart the "lcv" for 891 * loop because some other process could reorder the anon's in the 892 * am_anon[] array on us while the lock is dropped. 893 */ 894 895 ReStart: 896 amap_lock(amap); 897 898 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 899 900 /* 901 * get the page 902 */ 903 904 slot = amap->am_slots[lcv]; 905 anon = amap->am_anon[slot]; 906 simple_lock(&anon->an_lock); 907 pg = anon->u.an_page; 908 909 /* 910 * page must be resident since parent is wired 911 */ 912 913 if (pg == NULL) 914 panic("amap_cow_now: non-resident wired page in anon %p", 915 anon); 916 917 /* 918 * if the anon ref count is one and the page is not loaned, 919 * then we are safe (the child has exclusive access to the 920 * page). if the page is loaned, then it must already be 921 * mapped read-only. 922 * 923 * we only need to get involved when these are not true. 924 * [note: if loan_count == 0, then the anon must own the page] 925 */ 926 927 if (anon->an_ref > 1 && pg->loan_count == 0) { 928 929 /* 930 * if the page is busy then we have to unlock, wait for 931 * it and then restart. 932 */ 933 if (pg->flags & PG_BUSY) { 934 pg->flags |= PG_WANTED; 935 amap_unlock(amap); 936 UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE, 937 "cownow", 0); 938 goto ReStart; 939 } 940 941 /* 942 * ok, time to do a copy-on-write to a new anon 943 */ 944 nanon = uvm_analloc(); 945 if (nanon) { 946 /* nanon is locked! */ 947 npg = uvm_pagealloc(NULL, 0, nanon, 0); 948 } else 949 npg = NULL; /* XXX: quiet gcc warning */ 950 951 if (nanon == NULL || npg == NULL) { 952 /* out of memory */ 953 /* 954 * XXXCDC: we should cause fork to fail, but 955 * we can't ... 956 */ 957 if (nanon) { 958 nanon->an_ref--; 959 simple_unlock(&nanon->an_lock); 960 uvm_anfree(nanon); 961 } 962 simple_unlock(&anon->an_lock); 963 amap_unlock(amap); 964 uvm_wait("cownowpage"); 965 goto ReStart; 966 } 967 968 /* 969 * got it... now we can copy the data and replace anon 970 * with our new one... 971 */ 972 973 uvm_pagecopy(pg, npg); /* old -> new */ 974 anon->an_ref--; /* can't drop to zero */ 975 amap->am_anon[slot] = nanon; /* replace */ 976 977 /* 978 * drop PG_BUSY on new page ... since we have had it's 979 * owner locked the whole time it can't be 980 * PG_RELEASED | PG_WANTED. 981 */ 982 983 uvm_lock_pageq(); 984 uvm_pageactivate(npg); 985 uvm_unlock_pageq(); 986 npg->flags &= ~(PG_BUSY|PG_FAKE); 987 UVM_PAGE_OWN(npg, NULL); 988 simple_unlock(&nanon->an_lock); 989 } 990 simple_unlock(&anon->an_lock); 991 } 992 amap_unlock(amap); 993 } 994 995 /* 996 * amap_splitref: split a single reference into two separate references 997 * 998 * => called from uvm_map's clip routines 999 * => origref's map should be locked 1000 * => origref->ar_amap should be unlocked (we will lock) 1001 */ 1002 void 1003 amap_splitref(origref, splitref, offset) 1004 struct vm_aref *origref, *splitref; 1005 vaddr_t offset; 1006 { 1007 int leftslots; 1008 1009 AMAP_B2SLOT(leftslots, offset); 1010 if (leftslots == 0) 1011 panic("amap_splitref: split at zero offset"); 1012 1013 amap_lock(origref->ar_amap); 1014 1015 /* 1016 * now: amap is locked and we have a valid am_mapped array. 1017 */ 1018 1019 if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0) 1020 panic("amap_splitref: map size check failed"); 1021 1022 #ifdef UVM_AMAP_PPREF 1023 /* 1024 * establish ppref before we add a duplicate reference to the amap 1025 */ 1026 if (origref->ar_amap->am_ppref == NULL) 1027 amap_pp_establish(origref->ar_amap, origref->ar_pageoff); 1028 #endif 1029 1030 splitref->ar_amap = origref->ar_amap; 1031 splitref->ar_amap->am_ref++; /* not a share reference */ 1032 splitref->ar_pageoff = origref->ar_pageoff + leftslots; 1033 1034 amap_unlock(origref->ar_amap); 1035 } 1036 1037 #ifdef UVM_AMAP_PPREF 1038 1039 /* 1040 * amap_pp_establish: add a ppref array to an amap, if possible 1041 * 1042 * => amap locked by caller 1043 */ 1044 void 1045 amap_pp_establish(amap, offset) 1046 struct vm_amap *amap; 1047 vaddr_t offset; 1048 { 1049 amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot, 1050 M_UVMAMAP, M_NOWAIT); 1051 1052 /* 1053 * if we fail then we just won't use ppref for this amap 1054 */ 1055 1056 if (amap->am_ppref == NULL) { 1057 amap->am_ppref = PPREF_NONE; /* not using it */ 1058 return; 1059 } 1060 memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot); 1061 pp_setreflen(amap->am_ppref, 0, 0, offset); 1062 pp_setreflen(amap->am_ppref, offset, amap->am_ref, 1063 amap->am_nslot - offset); 1064 return; 1065 } 1066 1067 /* 1068 * amap_pp_adjref: adjust reference count to a part of an amap using the 1069 * per-page reference count array. 1070 * 1071 * => map and amap locked by caller 1072 * => caller must check that ppref != PPREF_NONE before calling 1073 */ 1074 void 1075 amap_pp_adjref(amap, curslot, slotlen, adjval) 1076 struct vm_amap *amap; 1077 int curslot; 1078 vsize_t slotlen; 1079 int adjval; 1080 { 1081 int stopslot, *ppref, lcv, prevlcv; 1082 int ref, len, prevref, prevlen; 1083 1084 stopslot = curslot + slotlen; 1085 ppref = amap->am_ppref; 1086 prevlcv = 0; 1087 1088 /* 1089 * first advance to the correct place in the ppref array, 1090 * fragment if needed. 1091 */ 1092 1093 for (lcv = 0 ; lcv < curslot ; lcv += len) { 1094 pp_getreflen(ppref, lcv, &ref, &len); 1095 if (lcv + len > curslot) { /* goes past start? */ 1096 pp_setreflen(ppref, lcv, ref, curslot - lcv); 1097 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv)); 1098 len = curslot - lcv; /* new length of entry @ lcv */ 1099 } 1100 prevlcv = lcv; 1101 } 1102 if (lcv != 0) 1103 pp_getreflen(ppref, prevlcv, &prevref, &prevlen); 1104 else { 1105 /* Ensure that the "prevref == ref" test below always 1106 * fails, since we're starting from the beginning of 1107 * the ppref array; that is, there is no previous 1108 * chunk. 1109 */ 1110 prevref = -1; 1111 prevlen = 0; 1112 } 1113 1114 /* 1115 * now adjust reference counts in range. merge the first 1116 * changed entry with the last unchanged entry if possible. 1117 */ 1118 1119 if (lcv != curslot) 1120 panic("amap_pp_adjref: overshot target"); 1121 1122 for (/* lcv already set */; lcv < stopslot ; lcv += len) { 1123 pp_getreflen(ppref, lcv, &ref, &len); 1124 if (lcv + len > stopslot) { /* goes past end? */ 1125 pp_setreflen(ppref, lcv, ref, stopslot - lcv); 1126 pp_setreflen(ppref, stopslot, ref, 1127 len - (stopslot - lcv)); 1128 len = stopslot - lcv; 1129 } 1130 ref += adjval; 1131 if (ref < 0) 1132 panic("amap_pp_adjref: negative reference count"); 1133 if (lcv == prevlcv + prevlen && ref == prevref) { 1134 pp_setreflen(ppref, prevlcv, ref, prevlen + len); 1135 } else { 1136 pp_setreflen(ppref, lcv, ref, len); 1137 } 1138 if (ref == 0) 1139 amap_wiperange(amap, lcv, len); 1140 } 1141 1142 } 1143 1144 /* 1145 * amap_wiperange: wipe out a range of an amap 1146 * [different from amap_wipeout because the amap is kept intact] 1147 * 1148 * => both map and amap must be locked by caller. 1149 */ 1150 void 1151 amap_wiperange(amap, slotoff, slots) 1152 struct vm_amap *amap; 1153 int slotoff, slots; 1154 { 1155 int byanon, lcv, stop, curslot, ptr, slotend; 1156 struct vm_anon *anon; 1157 1158 /* 1159 * we can either traverse the amap by am_anon or by am_slots depending 1160 * on which is cheaper. decide now. 1161 */ 1162 1163 if (slots < amap->am_nused) { 1164 byanon = TRUE; 1165 lcv = slotoff; 1166 stop = slotoff + slots; 1167 slotend = 0; 1168 } else { 1169 byanon = FALSE; 1170 lcv = 0; 1171 stop = amap->am_nused; 1172 slotend = slotoff + slots; 1173 } 1174 1175 while (lcv < stop) { 1176 int refs; 1177 1178 if (byanon) { 1179 curslot = lcv++; /* lcv advances here */ 1180 if (amap->am_anon[curslot] == NULL) 1181 continue; 1182 } else { 1183 curslot = amap->am_slots[lcv]; 1184 if (curslot < slotoff || curslot >= slotend) { 1185 lcv++; /* lcv advances here */ 1186 continue; 1187 } 1188 stop--; /* drop stop, since anon will be removed */ 1189 } 1190 anon = amap->am_anon[curslot]; 1191 1192 /* 1193 * remove it from the amap 1194 */ 1195 1196 amap->am_anon[curslot] = NULL; 1197 ptr = amap->am_bckptr[curslot]; 1198 if (ptr != (amap->am_nused - 1)) { 1199 amap->am_slots[ptr] = 1200 amap->am_slots[amap->am_nused - 1]; 1201 amap->am_bckptr[amap->am_slots[ptr]] = 1202 ptr; /* back ptr. */ 1203 } 1204 amap->am_nused--; 1205 1206 /* 1207 * drop anon reference count 1208 */ 1209 1210 simple_lock(&anon->an_lock); 1211 refs = --anon->an_ref; 1212 simple_unlock(&anon->an_lock); 1213 if (refs == 0) { 1214 1215 /* 1216 * we just eliminated the last reference to an anon. 1217 * free it. 1218 */ 1219 1220 uvm_anfree(anon); 1221 } 1222 } 1223 } 1224 1225 #endif 1226