1 /* $NetBSD: uvm_amap.c,v 1.40 2001/12/05 01:33:09 enami Exp $ */ 2 3 /* 4 * 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Charles D. Cranor and 19 * Washington University. 20 * 4. The name of the author may not be used to endorse or promote products 21 * derived from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * uvm_amap.c: amap operations 37 */ 38 39 /* 40 * this file contains functions that perform operations on amaps. see 41 * uvm_amap.h for a brief explanation of the role of amaps in uvm. 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.40 2001/12/05 01:33:09 enami Exp $"); 46 47 #undef UVM_AMAP_INLINE /* enable/disable amap inlines */ 48 49 #include "opt_uvmhist.h" 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/proc.h> 54 #include <sys/malloc.h> 55 #include <sys/kernel.h> 56 #include <sys/pool.h> 57 58 #define UVM_AMAP_C /* ensure disabled inlines are in */ 59 #include <uvm/uvm.h> 60 #include <uvm/uvm_swap.h> 61 62 /* 63 * pool for allocation of vm_map structures. note that the pool has 64 * its own simplelock for its protection. also note that in order to 65 * avoid an endless loop, the amap pool's allocator cannot allocate 66 * memory from an amap (it currently goes through the kernel uobj, so 67 * we are ok). 68 */ 69 70 struct pool uvm_amap_pool; 71 72 /* 73 * local functions 74 */ 75 76 static struct vm_amap *amap_alloc1 __P((int, int, int)); 77 78 #ifdef UVM_AMAP_PPREF 79 /* 80 * what is ppref? ppref is an _optional_ amap feature which is used 81 * to keep track of reference counts on a per-page basis. it is enabled 82 * when UVM_AMAP_PPREF is defined. 83 * 84 * when enabled, an array of ints is allocated for the pprefs. this 85 * array is allocated only when a partial reference is added to the 86 * map (either by unmapping part of the amap, or gaining a reference 87 * to only a part of an amap). if the malloc of the array fails 88 * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate 89 * that we tried to do ppref's but couldn't alloc the array so just 90 * give up (after all, this is an optional feature!). 91 * 92 * the array is divided into page sized "chunks." for chunks of length 1, 93 * the chunk reference count plus one is stored in that chunk's slot. 94 * for chunks of length > 1 the first slot contains (the reference count 95 * plus one) * -1. [the negative value indicates that the length is 96 * greater than one.] the second slot of the chunk contains the length 97 * of the chunk. here is an example: 98 * 99 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1 100 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x 101 * <----------><-><----><-------><----><-><-------> 102 * (x = don't care) 103 * 104 * this allows us to allow one int to contain the ref count for the whole 105 * chunk. note that the "plus one" part is needed because a reference 106 * count of zero is neither positive or negative (need a way to tell 107 * if we've got one zero or a bunch of them). 108 * 109 * here are some in-line functions to help us. 110 */ 111 112 static __inline void pp_getreflen __P((int *, int, int *, int *)); 113 static __inline void pp_setreflen __P((int *, int, int, int)); 114 115 /* 116 * pp_getreflen: get the reference and length for a specific offset 117 * 118 * => ppref's amap must be locked 119 */ 120 static __inline void 121 pp_getreflen(ppref, offset, refp, lenp) 122 int *ppref, offset, *refp, *lenp; 123 { 124 125 if (ppref[offset] > 0) { /* chunk size must be 1 */ 126 *refp = ppref[offset] - 1; /* don't forget to adjust */ 127 *lenp = 1; 128 } else { 129 *refp = (ppref[offset] * -1) - 1; 130 *lenp = ppref[offset+1]; 131 } 132 } 133 134 /* 135 * pp_setreflen: set the reference and length for a specific offset 136 * 137 * => ppref's amap must be locked 138 */ 139 static __inline void 140 pp_setreflen(ppref, offset, ref, len) 141 int *ppref, offset, ref, len; 142 { 143 if (len == 1) { 144 ppref[offset] = ref + 1; 145 } else { 146 ppref[offset] = (ref + 1) * -1; 147 ppref[offset+1] = len; 148 } 149 } 150 #endif 151 152 /* 153 * amap_init: called at boot time to init global amap data structures 154 */ 155 156 void 157 amap_init(void) 158 { 159 160 /* 161 * Initialize the vm_amap pool. 162 */ 163 164 pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0, 165 "amappl", 0, pool_page_alloc_nointr, pool_page_free_nointr, 166 M_UVMAMAP); 167 } 168 169 /* 170 * amap_alloc1: internal function that allocates an amap, but does not 171 * init the overlay. 172 * 173 * => lock on returned amap is init'd 174 */ 175 static inline struct vm_amap * 176 amap_alloc1(slots, padslots, waitf) 177 int slots, padslots, waitf; 178 { 179 struct vm_amap *amap; 180 int totalslots; 181 182 amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0); 183 if (amap == NULL) 184 return(NULL); 185 186 totalslots = malloc_roundup((slots + padslots) * sizeof(int)) / 187 sizeof(int); 188 simple_lock_init(&amap->am_l); 189 amap->am_ref = 1; 190 amap->am_flags = 0; 191 #ifdef UVM_AMAP_PPREF 192 amap->am_ppref = NULL; 193 #endif 194 amap->am_maxslot = totalslots; 195 amap->am_nslot = slots; 196 amap->am_nused = 0; 197 198 amap->am_slots = malloc(totalslots * sizeof(int), M_UVMAMAP, 199 waitf); 200 if (amap->am_slots == NULL) 201 goto fail1; 202 203 amap->am_bckptr = malloc(totalslots * sizeof(int), M_UVMAMAP, waitf); 204 if (amap->am_bckptr == NULL) 205 goto fail2; 206 207 amap->am_anon = malloc(totalslots * sizeof(struct vm_anon *), 208 M_UVMAMAP, waitf); 209 if (amap->am_anon == NULL) 210 goto fail3; 211 212 return(amap); 213 214 fail3: 215 free(amap->am_bckptr, M_UVMAMAP); 216 fail2: 217 free(amap->am_slots, M_UVMAMAP); 218 fail1: 219 pool_put(&uvm_amap_pool, amap); 220 return (NULL); 221 } 222 223 /* 224 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM 225 * 226 * => caller should ensure sz is a multiple of PAGE_SIZE 227 * => reference count to new amap is set to one 228 * => new amap is returned unlocked 229 */ 230 231 struct vm_amap * 232 amap_alloc(sz, padsz, waitf) 233 vaddr_t sz, padsz; 234 int waitf; 235 { 236 struct vm_amap *amap; 237 int slots, padslots; 238 UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist); 239 240 AMAP_B2SLOT(slots, sz); 241 AMAP_B2SLOT(padslots, padsz); 242 243 amap = amap_alloc1(slots, padslots, waitf); 244 if (amap) 245 memset(amap->am_anon, 0, 246 amap->am_maxslot * sizeof(struct vm_anon *)); 247 248 UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0); 249 return(amap); 250 } 251 252 253 /* 254 * amap_free: free an amap 255 * 256 * => the amap must be unlocked 257 * => the amap should have a zero reference count and be empty 258 */ 259 void 260 amap_free(amap) 261 struct vm_amap *amap; 262 { 263 UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist); 264 265 KASSERT(amap->am_ref == 0 && amap->am_nused == 0); 266 LOCK_ASSERT(!simple_lock_held(&amap->am_l)); 267 free(amap->am_slots, M_UVMAMAP); 268 free(amap->am_bckptr, M_UVMAMAP); 269 free(amap->am_anon, M_UVMAMAP); 270 #ifdef UVM_AMAP_PPREF 271 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) 272 free(amap->am_ppref, M_UVMAMAP); 273 #endif 274 pool_put(&uvm_amap_pool, amap); 275 UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0); 276 } 277 278 /* 279 * amap_extend: extend the size of an amap (if needed) 280 * 281 * => called from uvm_map when we want to extend an amap to cover 282 * a new mapping (rather than allocate a new one) 283 * => amap should be unlocked (we will lock it) 284 * => to safely extend an amap it should have a reference count of 285 * one (thus it can't be shared) 286 * => XXXCDC: needs a waitflag or failure return value? 287 * => XXXCDC: support padding at this level? 288 */ 289 void 290 amap_extend(entry, addsize) 291 struct vm_map_entry *entry; 292 vsize_t addsize; 293 { 294 struct vm_amap *amap = entry->aref.ar_amap; 295 int slotoff = entry->aref.ar_pageoff; 296 int slotmapped, slotadd, slotneed, slotadded, slotalloc; 297 #ifdef UVM_AMAP_PPREF 298 int *newppref, *oldppref; 299 #endif 300 int *newsl, *newbck, *oldsl, *oldbck; 301 struct vm_anon **newover, **oldover; 302 UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist); 303 304 UVMHIST_LOG(maphist, " (entry=0x%x, addsize=0x%x)", entry,addsize,0,0); 305 306 /* 307 * first, determine how many slots we need in the amap. don't 308 * forget that ar_pageoff could be non-zero: this means that 309 * there are some unused slots before us in the amap. 310 */ 311 312 amap_lock(amap); /* lock! */ 313 314 AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */ 315 AMAP_B2SLOT(slotadd, addsize); /* slots to add */ 316 slotneed = slotoff + slotmapped + slotadd; 317 318 /* 319 * case 1: we already have enough slots in the map and thus 320 * only need to bump the reference counts on the slots we are 321 * adding. 322 */ 323 324 if (amap->am_nslot >= slotneed) { 325 #ifdef UVM_AMAP_PPREF 326 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 327 amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1); 328 } 329 #endif 330 amap_unlock(amap); 331 UVMHIST_LOG(maphist,"<- done (case 1), amap = 0x%x, sltneed=%d", 332 amap, slotneed, 0, 0); 333 return; /* done! */ 334 } 335 336 /* 337 * case 2: we pre-allocated slots for use and we just need to 338 * bump nslot up to take account for these slots. 339 */ 340 if (amap->am_maxslot >= slotneed) { 341 #ifdef UVM_AMAP_PPREF 342 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 343 if ((slotoff + slotmapped) < amap->am_nslot) 344 amap_pp_adjref(amap, slotoff + slotmapped, 345 (amap->am_nslot - (slotoff + slotmapped)), 346 1); 347 pp_setreflen(amap->am_ppref, amap->am_nslot, 1, 348 slotneed - amap->am_nslot); 349 } 350 #endif 351 amap->am_nslot = slotneed; 352 amap_unlock(amap); 353 /* 354 * no need to zero am_anon since that was done at 355 * alloc time and we never shrink an allocation. 356 */ 357 UVMHIST_LOG(maphist,"<- done (case 2), amap = 0x%x, slotneed=%d", 358 amap, slotneed, 0, 0); 359 return; 360 } 361 362 /* 363 * case 3: we need to malloc a new amap and copy all the amap 364 * data over from old amap to the new one. 365 * 366 * XXXCDC: could we take advantage of a kernel realloc()? 367 */ 368 369 amap_unlock(amap); /* unlock in case we sleep in malloc */ 370 slotalloc = malloc_roundup(slotneed * sizeof(int)) / sizeof(int); 371 #ifdef UVM_AMAP_PPREF 372 newppref = NULL; 373 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 374 newppref = malloc(slotalloc * sizeof(int), M_UVMAMAP, 375 M_NOWAIT); 376 if (newppref == NULL) { 377 /* give up if malloc fails */ 378 free(amap->am_ppref, M_UVMAMAP); 379 amap->am_ppref = PPREF_NONE; 380 } 381 } 382 #endif 383 newsl = malloc(slotalloc * sizeof(int), M_UVMAMAP, M_WAITOK); 384 newbck = malloc(slotalloc * sizeof(int), M_UVMAMAP, M_WAITOK); 385 newover = malloc(slotalloc * sizeof(struct vm_anon *), 386 M_UVMAMAP, M_WAITOK); 387 amap_lock(amap); /* re-lock! */ 388 KASSERT(amap->am_maxslot < slotneed); 389 390 /* 391 * now copy everything over to new malloc'd areas... 392 */ 393 394 slotadded = slotalloc - amap->am_nslot; 395 396 /* do am_slots */ 397 oldsl = amap->am_slots; 398 memcpy(newsl, oldsl, sizeof(int) * amap->am_nused); 399 amap->am_slots = newsl; 400 401 /* do am_anon */ 402 oldover = amap->am_anon; 403 memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot); 404 memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) * slotadded); 405 amap->am_anon = newover; 406 407 /* do am_bckptr */ 408 oldbck = amap->am_bckptr; 409 memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot); 410 amap->am_bckptr = newbck; 411 412 #ifdef UVM_AMAP_PPREF 413 /* do ppref */ 414 oldppref = amap->am_ppref; 415 if (newppref) { 416 memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot); 417 memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded); 418 amap->am_ppref = newppref; 419 if ((slotoff + slotmapped) < amap->am_nslot) 420 amap_pp_adjref(amap, slotoff + slotmapped, 421 (amap->am_nslot - (slotoff + slotmapped)), 1); 422 pp_setreflen(newppref, amap->am_nslot, 1, 423 slotneed - amap->am_nslot); 424 } 425 #endif 426 427 /* update master values */ 428 amap->am_nslot = slotneed; 429 amap->am_maxslot = slotalloc; 430 431 amap_unlock(amap); 432 free(oldsl, M_UVMAMAP); 433 free(oldbck, M_UVMAMAP); 434 free(oldover, M_UVMAMAP); 435 #ifdef UVM_AMAP_PPREF 436 if (oldppref && oldppref != PPREF_NONE) 437 free(oldppref, M_UVMAMAP); 438 #endif 439 UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d", 440 amap, slotneed, 0, 0); 441 } 442 443 /* 444 * amap_share_protect: change protection of anons in a shared amap 445 * 446 * for shared amaps, given the current data structure layout, it is 447 * not possible for us to directly locate all maps referencing the 448 * shared anon (to change the protection). in order to protect data 449 * in shared maps we use pmap_page_protect(). [this is useful for IPC 450 * mechanisms like map entry passing that may want to write-protect 451 * all mappings of a shared amap.] we traverse am_anon or am_slots 452 * depending on the current state of the amap. 453 * 454 * => entry's map and amap must be locked by the caller 455 */ 456 void 457 amap_share_protect(entry, prot) 458 struct vm_map_entry *entry; 459 vm_prot_t prot; 460 { 461 struct vm_amap *amap = entry->aref.ar_amap; 462 int slots, lcv, slot, stop; 463 464 LOCK_ASSERT(simple_lock_held(&amap->am_l)); 465 466 AMAP_B2SLOT(slots, (entry->end - entry->start)); 467 stop = entry->aref.ar_pageoff + slots; 468 469 if (slots < amap->am_nused) { 470 /* cheaper to traverse am_anon */ 471 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) { 472 if (amap->am_anon[lcv] == NULL) 473 continue; 474 if (amap->am_anon[lcv]->u.an_page != NULL) 475 pmap_page_protect(amap->am_anon[lcv]->u.an_page, 476 prot); 477 } 478 return; 479 } 480 481 /* cheaper to traverse am_slots */ 482 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 483 slot = amap->am_slots[lcv]; 484 if (slot < entry->aref.ar_pageoff || slot >= stop) 485 continue; 486 if (amap->am_anon[slot]->u.an_page != NULL) 487 pmap_page_protect(amap->am_anon[slot]->u.an_page, prot); 488 } 489 } 490 491 /* 492 * amap_wipeout: wipeout all anon's in an amap; then free the amap! 493 * 494 * => called from amap_unref when the final reference to an amap is 495 * discarded (i.e. when reference count == 1) 496 * => the amap should be locked (by the caller) 497 */ 498 499 void 500 amap_wipeout(amap) 501 struct vm_amap *amap; 502 { 503 int lcv, slot; 504 struct vm_anon *anon; 505 UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist); 506 UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0); 507 508 amap_unlock(amap); 509 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 510 int refs; 511 512 slot = amap->am_slots[lcv]; 513 anon = amap->am_anon[slot]; 514 515 if (anon == NULL || anon->an_ref == 0) 516 panic("amap_wipeout: corrupt amap"); 517 518 simple_lock(&anon->an_lock); 519 UVMHIST_LOG(maphist," processing anon 0x%x, ref=%d", anon, 520 anon->an_ref, 0, 0); 521 refs = --anon->an_ref; 522 simple_unlock(&anon->an_lock); 523 if (refs == 0) { 524 525 /* 526 * we had the last reference to a vm_anon. free it. 527 */ 528 529 uvm_anfree(anon); 530 } 531 532 /* 533 * XXX 534 * releasing the swap space held by an N anons is an O(N^2) 535 * operation because of the implementation of extents. 536 * if there are many anons, tearing down an exiting process' 537 * address space can take many seconds, which causes very 538 * annoying pauses. we yield here to give other processes 539 * a chance to run. this should be removed once the performance 540 * of swap space management is improved. 541 */ 542 543 if (curproc->p_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) 544 preempt(NULL); 545 } 546 547 /* 548 * now we free the map 549 */ 550 551 amap->am_ref = 0; /* ... was one */ 552 amap->am_nused = 0; 553 amap_free(amap); /* will unlock and free amap */ 554 UVMHIST_LOG(maphist,"<- done!", 0,0,0,0); 555 } 556 557 /* 558 * amap_copy: ensure that a map entry's "needs_copy" flag is false 559 * by copying the amap if necessary. 560 * 561 * => an entry with a null amap pointer will get a new (blank) one. 562 * => the map that the map entry belongs to must be locked by caller. 563 * => the amap currently attached to "entry" (if any) must be unlocked. 564 * => if canchunk is true, then we may clip the entry into a chunk 565 * => "startva" and "endva" are used only if canchunk is true. they are 566 * used to limit chunking (e.g. if you have a large space that you 567 * know you are going to need to allocate amaps for, there is no point 568 * in allowing that to be chunked) 569 */ 570 571 void 572 amap_copy(map, entry, waitf, canchunk, startva, endva) 573 struct vm_map *map; 574 struct vm_map_entry *entry; 575 int waitf; 576 boolean_t canchunk; 577 vaddr_t startva, endva; 578 { 579 struct vm_amap *amap, *srcamap; 580 int slots, lcv; 581 vaddr_t chunksize; 582 UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist); 583 UVMHIST_LOG(maphist, " (map=%p, entry=%p, waitf=%d)", 584 map, entry, waitf, 0); 585 586 /* 587 * is there a map to copy? if not, create one from scratch. 588 */ 589 590 if (entry->aref.ar_amap == NULL) { 591 592 /* 593 * check to see if we have a large amap that we can 594 * chunk. we align startva/endva to chunk-sized 595 * boundaries and then clip to them. 596 */ 597 598 if (canchunk && atop(entry->end - entry->start) >= 599 UVM_AMAP_LARGE) { 600 /* convert slots to bytes */ 601 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT; 602 startva = (startva / chunksize) * chunksize; 603 endva = roundup(endva, chunksize); 604 UVMHIST_LOG(maphist, " chunk amap ==> clip 0x%x->0x%x" 605 "to 0x%x->0x%x", entry->start, entry->end, startva, 606 endva); 607 UVM_MAP_CLIP_START(map, entry, startva); 608 /* watch out for endva wrap-around! */ 609 if (endva >= startva) 610 UVM_MAP_CLIP_END(map, entry, endva); 611 } 612 613 UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]", 614 entry->start, entry->end, 0, 0); 615 entry->aref.ar_pageoff = 0; 616 entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0, 617 waitf); 618 if (entry->aref.ar_amap != NULL) 619 entry->etype &= ~UVM_ET_NEEDSCOPY; 620 return; 621 } 622 623 /* 624 * first check and see if we are the only map entry 625 * referencing the amap we currently have. if so, then we can 626 * just take it over rather than copying it. note that we are 627 * reading am_ref with the amap unlocked... the value can only 628 * be one if we have the only reference to the amap (via our 629 * locked map). if we are greater than one we fall through to 630 * the next case (where we double check the value). 631 */ 632 633 if (entry->aref.ar_amap->am_ref == 1) { 634 entry->etype &= ~UVM_ET_NEEDSCOPY; 635 UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]", 636 0, 0, 0, 0); 637 return; 638 } 639 640 /* 641 * looks like we need to copy the map. 642 */ 643 644 UVMHIST_LOG(maphist," amap=%p, ref=%d, must copy it", 645 entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0); 646 AMAP_B2SLOT(slots, entry->end - entry->start); 647 amap = amap_alloc1(slots, 0, waitf); 648 if (amap == NULL) { 649 UVMHIST_LOG(maphist, " amap_alloc1 failed", 0,0,0,0); 650 return; 651 } 652 srcamap = entry->aref.ar_amap; 653 amap_lock(srcamap); 654 655 /* 656 * need to double check reference count now that we've got the 657 * src amap locked down. the reference count could have 658 * changed while we were in malloc. if the reference count 659 * dropped down to one we take over the old map rather than 660 * copying the amap. 661 */ 662 663 if (srcamap->am_ref == 1) { /* take it over? */ 664 entry->etype &= ~UVM_ET_NEEDSCOPY; 665 amap->am_ref--; /* drop final reference to map */ 666 amap_unlock(amap); 667 amap_free(amap); /* dispose of new (unused) amap */ 668 amap_unlock(srcamap); 669 return; 670 } 671 672 /* 673 * we must copy it now. 674 */ 675 676 UVMHIST_LOG(maphist, " copying amap now",0, 0, 0, 0); 677 for (lcv = 0 ; lcv < slots; lcv++) { 678 amap->am_anon[lcv] = 679 srcamap->am_anon[entry->aref.ar_pageoff + lcv]; 680 if (amap->am_anon[lcv] == NULL) 681 continue; 682 simple_lock(&amap->am_anon[lcv]->an_lock); 683 amap->am_anon[lcv]->an_ref++; 684 simple_unlock(&amap->am_anon[lcv]->an_lock); 685 amap->am_bckptr[lcv] = amap->am_nused; 686 amap->am_slots[amap->am_nused] = lcv; 687 amap->am_nused++; 688 } 689 memset(&amap->am_anon[lcv], 0, 690 (amap->am_maxslot - lcv) * sizeof(struct vm_anon *)); 691 692 /* 693 * drop our reference to the old amap (srcamap) and unlock. 694 * we know that the reference count on srcamap is greater than 695 * one (we checked above), so there is no way we could drop 696 * the count to zero. [and no need to worry about freeing it] 697 */ 698 699 srcamap->am_ref--; 700 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) 701 srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 702 #ifdef UVM_AMAP_PPREF 703 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) { 704 amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 705 (entry->end - entry->start) >> PAGE_SHIFT, -1); 706 } 707 #endif 708 709 amap_unlock(srcamap); 710 711 /* 712 * install new amap. 713 */ 714 715 entry->aref.ar_pageoff = 0; 716 entry->aref.ar_amap = amap; 717 entry->etype &= ~UVM_ET_NEEDSCOPY; 718 UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0); 719 } 720 721 /* 722 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2) 723 * 724 * called during fork(2) when the parent process has a wired map 725 * entry. in that case we want to avoid write-protecting pages 726 * in the parent's map (e.g. like what you'd do for a COW page) 727 * so we resolve the COW here. 728 * 729 * => assume parent's entry was wired, thus all pages are resident. 730 * => assume pages that are loaned out (loan_count) are already mapped 731 * read-only in all maps, and thus no need for us to worry about them 732 * => assume both parent and child vm_map's are locked 733 * => caller passes child's map/entry in to us 734 * => if we run out of memory we will unlock the amap and sleep _with_ the 735 * parent and child vm_map's locked(!). we have to do this since 736 * we are in the middle of a fork(2) and we can't let the parent 737 * map change until we are done copying all the map entrys. 738 * => XXXCDC: out of memory should cause fork to fail, but there is 739 * currently no easy way to do this (needs fix) 740 * => page queues must be unlocked (we may lock them) 741 */ 742 743 void 744 amap_cow_now(map, entry) 745 struct vm_map *map; 746 struct vm_map_entry *entry; 747 { 748 struct vm_amap *amap = entry->aref.ar_amap; 749 int lcv, slot; 750 struct vm_anon *anon, *nanon; 751 struct vm_page *pg, *npg; 752 753 /* 754 * note that if we unlock the amap then we must ReStart the "lcv" for 755 * loop because some other process could reorder the anon's in the 756 * am_anon[] array on us while the lock is dropped. 757 */ 758 759 ReStart: 760 amap_lock(amap); 761 762 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 763 764 /* 765 * get the page 766 */ 767 768 slot = amap->am_slots[lcv]; 769 anon = amap->am_anon[slot]; 770 simple_lock(&anon->an_lock); 771 pg = anon->u.an_page; 772 773 /* 774 * page must be resident since parent is wired 775 */ 776 777 if (pg == NULL) 778 panic("amap_cow_now: non-resident wired page in anon %p", 779 anon); 780 781 /* 782 * if the anon ref count is one and the page is not loaned, 783 * then we are safe (the child has exclusive access to the 784 * page). if the page is loaned, then it must already be 785 * mapped read-only. 786 * 787 * we only need to get involved when these are not true. 788 * [note: if loan_count == 0, then the anon must own the page] 789 */ 790 791 if (anon->an_ref > 1 && pg->loan_count == 0) { 792 793 /* 794 * if the page is busy then we have to unlock, wait for 795 * it and then restart. 796 */ 797 if (pg->flags & PG_BUSY) { 798 pg->flags |= PG_WANTED; 799 amap_unlock(amap); 800 UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE, 801 "cownow", 0); 802 goto ReStart; 803 } 804 805 /* 806 * ok, time to do a copy-on-write to a new anon 807 */ 808 nanon = uvm_analloc(); 809 if (nanon) { 810 /* nanon is locked! */ 811 npg = uvm_pagealloc(NULL, 0, nanon, 0); 812 } else 813 npg = NULL; /* XXX: quiet gcc warning */ 814 815 if (nanon == NULL || npg == NULL) { 816 /* out of memory */ 817 /* 818 * XXXCDC: we should cause fork to fail, but 819 * we can't ... 820 */ 821 if (nanon) { 822 nanon->an_ref--; 823 simple_unlock(&nanon->an_lock); 824 uvm_anfree(nanon); 825 } 826 simple_unlock(&anon->an_lock); 827 amap_unlock(amap); 828 uvm_wait("cownowpage"); 829 goto ReStart; 830 } 831 832 /* 833 * got it... now we can copy the data and replace anon 834 * with our new one... 835 */ 836 837 uvm_pagecopy(pg, npg); /* old -> new */ 838 anon->an_ref--; /* can't drop to zero */ 839 amap->am_anon[slot] = nanon; /* replace */ 840 841 /* 842 * drop PG_BUSY on new page ... since we have had it's 843 * owner locked the whole time it can't be 844 * PG_RELEASED | PG_WANTED. 845 */ 846 847 npg->flags &= ~(PG_BUSY|PG_FAKE); 848 UVM_PAGE_OWN(npg, NULL); 849 uvm_lock_pageq(); 850 uvm_pageactivate(npg); 851 uvm_unlock_pageq(); 852 simple_unlock(&nanon->an_lock); 853 } 854 simple_unlock(&anon->an_lock); 855 } 856 amap_unlock(amap); 857 } 858 859 /* 860 * amap_splitref: split a single reference into two separate references 861 * 862 * => called from uvm_map's clip routines 863 * => origref's map should be locked 864 * => origref->ar_amap should be unlocked (we will lock) 865 */ 866 void 867 amap_splitref(origref, splitref, offset) 868 struct vm_aref *origref, *splitref; 869 vaddr_t offset; 870 { 871 int leftslots; 872 873 AMAP_B2SLOT(leftslots, offset); 874 if (leftslots == 0) 875 panic("amap_splitref: split at zero offset"); 876 877 amap_lock(origref->ar_amap); 878 879 /* 880 * now: amap is locked and we have a valid am_mapped array. 881 */ 882 883 if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0) 884 panic("amap_splitref: map size check failed"); 885 886 #ifdef UVM_AMAP_PPREF 887 /* 888 * establish ppref before we add a duplicate reference to the amap 889 */ 890 if (origref->ar_amap->am_ppref == NULL) 891 amap_pp_establish(origref->ar_amap); 892 #endif 893 894 splitref->ar_amap = origref->ar_amap; 895 splitref->ar_amap->am_ref++; /* not a share reference */ 896 splitref->ar_pageoff = origref->ar_pageoff + leftslots; 897 898 amap_unlock(origref->ar_amap); 899 } 900 901 #ifdef UVM_AMAP_PPREF 902 903 /* 904 * amap_pp_establish: add a ppref array to an amap, if possible 905 * 906 * => amap locked by caller 907 */ 908 void 909 amap_pp_establish(amap) 910 struct vm_amap *amap; 911 { 912 amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot, 913 M_UVMAMAP, M_NOWAIT); 914 915 /* 916 * if we fail then we just won't use ppref for this amap 917 */ 918 919 if (amap->am_ppref == NULL) { 920 amap->am_ppref = PPREF_NONE; /* not using it */ 921 return; 922 } 923 memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot); 924 pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot); 925 return; 926 } 927 928 /* 929 * amap_pp_adjref: adjust reference count to a part of an amap using the 930 * per-page reference count array. 931 * 932 * => map and amap locked by caller 933 * => caller must check that ppref != PPREF_NONE before calling 934 */ 935 void 936 amap_pp_adjref(amap, curslot, slotlen, adjval) 937 struct vm_amap *amap; 938 int curslot; 939 vsize_t slotlen; 940 int adjval; 941 { 942 int stopslot, *ppref, lcv; 943 int ref, len; 944 945 stopslot = curslot + slotlen; 946 ppref = amap->am_ppref; 947 948 /* 949 * first advance to the correct place in the ppref array, fragment 950 * if needed. 951 */ 952 953 for (lcv = 0 ; lcv < curslot ; lcv += len) { 954 pp_getreflen(ppref, lcv, &ref, &len); 955 if (lcv + len > curslot) { /* goes past start? */ 956 pp_setreflen(ppref, lcv, ref, curslot - lcv); 957 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv)); 958 len = curslot - lcv; /* new length of entry @ lcv */ 959 } 960 } 961 962 /* 963 * now adjust reference counts in range (make sure we dont overshoot) 964 */ 965 966 if (lcv != curslot) 967 panic("amap_pp_adjref: overshot target"); 968 969 for (/* lcv already set */; lcv < stopslot ; lcv += len) { 970 pp_getreflen(ppref, lcv, &ref, &len); 971 if (lcv + len > stopslot) { /* goes past end? */ 972 pp_setreflen(ppref, lcv, ref, stopslot - lcv); 973 pp_setreflen(ppref, stopslot, ref, 974 len - (stopslot - lcv)); 975 len = stopslot - lcv; 976 } 977 ref = ref + adjval; /* ADJUST! */ 978 if (ref < 0) 979 panic("amap_pp_adjref: negative reference count"); 980 pp_setreflen(ppref, lcv, ref, len); 981 if (ref == 0) 982 amap_wiperange(amap, lcv, len); 983 } 984 985 } 986 987 /* 988 * amap_wiperange: wipe out a range of an amap 989 * [different from amap_wipeout because the amap is kept intact] 990 * 991 * => both map and amap must be locked by caller. 992 */ 993 void 994 amap_wiperange(amap, slotoff, slots) 995 struct vm_amap *amap; 996 int slotoff, slots; 997 { 998 int byanon, lcv, stop, curslot, ptr, slotend; 999 struct vm_anon *anon; 1000 1001 /* 1002 * we can either traverse the amap by am_anon or by am_slots depending 1003 * on which is cheaper. decide now. 1004 */ 1005 1006 if (slots < amap->am_nused) { 1007 byanon = TRUE; 1008 lcv = slotoff; 1009 stop = slotoff + slots; 1010 } else { 1011 byanon = FALSE; 1012 lcv = 0; 1013 stop = amap->am_nused; 1014 slotend = slotoff + slots; 1015 } 1016 1017 while (lcv < stop) { 1018 int refs; 1019 1020 if (byanon) { 1021 curslot = lcv++; /* lcv advances here */ 1022 if (amap->am_anon[curslot] == NULL) 1023 continue; 1024 } else { 1025 curslot = amap->am_slots[lcv]; 1026 if (curslot < slotoff || curslot >= slotend) { 1027 lcv++; /* lcv advances here */ 1028 continue; 1029 } 1030 stop--; /* drop stop, since anon will be removed */ 1031 } 1032 anon = amap->am_anon[curslot]; 1033 1034 /* 1035 * remove it from the amap 1036 */ 1037 1038 amap->am_anon[curslot] = NULL; 1039 ptr = amap->am_bckptr[curslot]; 1040 if (ptr != (amap->am_nused - 1)) { 1041 amap->am_slots[ptr] = 1042 amap->am_slots[amap->am_nused - 1]; 1043 amap->am_bckptr[amap->am_slots[ptr]] = 1044 ptr; /* back ptr. */ 1045 } 1046 amap->am_nused--; 1047 1048 /* 1049 * drop anon reference count 1050 */ 1051 1052 simple_lock(&anon->an_lock); 1053 refs = --anon->an_ref; 1054 simple_unlock(&anon->an_lock); 1055 if (refs == 0) { 1056 1057 /* 1058 * we just eliminated the last reference to an anon. 1059 * free it. 1060 */ 1061 1062 uvm_anfree(anon); 1063 } 1064 } 1065 } 1066 1067 #endif 1068