1 /* $OpenBSD: uvm_amap.c,v 1.44 2009/03/25 20:00:18 oga Exp $ */ 2 /* $NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $ */ 3 4 /* 5 * 6 * Copyright (c) 1997 Charles D. Cranor and Washington University. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by Charles D. Cranor and 20 * Washington University. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 29 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * uvm_amap.c: amap operations 38 */ 39 40 /* 41 * this file contains functions that perform operations on amaps. see 42 * uvm_amap.h for a brief explanation of the role of amaps in uvm. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/proc.h> 48 #include <sys/malloc.h> 49 #include <sys/kernel.h> 50 #include <sys/pool.h> 51 52 #include <uvm/uvm.h> 53 #include <uvm/uvm_swap.h> 54 55 /* 56 * pool for allocation of vm_map structures. note that the pool has 57 * its own simplelock for its protection. also note that in order to 58 * avoid an endless loop, the amap pool's allocator cannot allocate 59 * memory from an amap (it currently goes through the kernel uobj, so 60 * we are ok). 61 */ 62 63 struct pool uvm_amap_pool; 64 65 LIST_HEAD(, vm_amap) amap_list; 66 67 #define MALLOC_SLOT_UNIT (2 * sizeof(int) + sizeof(struct vm_anon *)) 68 69 /* 70 * local functions 71 */ 72 73 static struct vm_amap *amap_alloc1(int, int, int); 74 static __inline void amap_list_insert(struct vm_amap *); 75 static __inline void amap_list_remove(struct vm_amap *); 76 77 static __inline void 78 amap_list_insert(struct vm_amap *amap) 79 { 80 LIST_INSERT_HEAD(&amap_list, amap, am_list); 81 } 82 83 static __inline void 84 amap_list_remove(struct vm_amap *amap) 85 { 86 LIST_REMOVE(amap, am_list); 87 } 88 89 #ifdef UVM_AMAP_PPREF 90 /* 91 * what is ppref? ppref is an _optional_ amap feature which is used 92 * to keep track of reference counts on a per-page basis. it is enabled 93 * when UVM_AMAP_PPREF is defined. 94 * 95 * when enabled, an array of ints is allocated for the pprefs. this 96 * array is allocated only when a partial reference is added to the 97 * map (either by unmapping part of the amap, or gaining a reference 98 * to only a part of an amap). if the malloc of the array fails 99 * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate 100 * that we tried to do ppref's but couldn't alloc the array so just 101 * give up (after all, this is an optional feature!). 102 * 103 * the array is divided into page sized "chunks." for chunks of length 1, 104 * the chunk reference count plus one is stored in that chunk's slot. 105 * for chunks of length > 1 the first slot contains (the reference count 106 * plus one) * -1. [the negative value indicates that the length is 107 * greater than one.] the second slot of the chunk contains the length 108 * of the chunk. here is an example: 109 * 110 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1 111 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x 112 * <----------><-><----><-------><----><-><-------> 113 * (x = don't care) 114 * 115 * this allows us to allow one int to contain the ref count for the whole 116 * chunk. note that the "plus one" part is needed because a reference 117 * count of zero is neither positive or negative (need a way to tell 118 * if we've got one zero or a bunch of them). 119 * 120 * here are some in-line functions to help us. 121 */ 122 123 static __inline void pp_getreflen(int *, int, int *, int *); 124 static __inline void pp_setreflen(int *, int, int, int); 125 126 /* 127 * pp_getreflen: get the reference and length for a specific offset 128 * 129 * => ppref's amap must be locked 130 */ 131 static __inline void 132 pp_getreflen(int *ppref, int offset, int *refp, int *lenp) 133 { 134 135 if (ppref[offset] > 0) { /* chunk size must be 1 */ 136 *refp = ppref[offset] - 1; /* don't forget to adjust */ 137 *lenp = 1; 138 } else { 139 *refp = (ppref[offset] * -1) - 1; 140 *lenp = ppref[offset+1]; 141 } 142 } 143 144 /* 145 * pp_setreflen: set the reference and length for a specific offset 146 * 147 * => ppref's amap must be locked 148 */ 149 static __inline void 150 pp_setreflen(int *ppref, int offset, int ref, int len) 151 { 152 if (len == 1) { 153 ppref[offset] = ref + 1; 154 } else { 155 ppref[offset] = (ref + 1) * -1; 156 ppref[offset+1] = len; 157 } 158 } 159 #endif 160 161 /* 162 * amap_init: called at boot time to init global amap data structures 163 */ 164 165 void 166 amap_init(void) 167 { 168 /* 169 * Initialize the vm_amap pool. 170 */ 171 pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0, 172 "amappl", &pool_allocator_nointr); 173 pool_sethiwat(&uvm_amap_pool, 4096); 174 } 175 176 /* 177 * amap_alloc1: internal function that allocates an amap, but does not 178 * init the overlay. 179 * 180 * => lock on returned amap is init'd 181 */ 182 static inline struct vm_amap * 183 amap_alloc1(int slots, int padslots, int waitf) 184 { 185 struct vm_amap *amap; 186 int totalslots; 187 188 amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0); 189 if (amap == NULL) 190 return(NULL); 191 192 totalslots = malloc_roundup((slots + padslots) * MALLOC_SLOT_UNIT) / 193 MALLOC_SLOT_UNIT; 194 amap->am_ref = 1; 195 amap->am_flags = 0; 196 #ifdef UVM_AMAP_PPREF 197 amap->am_ppref = NULL; 198 #endif 199 amap->am_maxslot = totalslots; 200 amap->am_nslot = slots; 201 amap->am_nused = 0; 202 203 amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT, M_UVMAMAP, 204 waitf); 205 if (amap->am_slots == NULL) 206 goto fail1; 207 208 amap->am_bckptr = (int *)(((char *)amap->am_slots) + totalslots * 209 sizeof(int)); 210 amap->am_anon = (struct vm_anon **)(((char *)amap->am_bckptr) + 211 totalslots * sizeof(int)); 212 213 return(amap); 214 215 fail1: 216 pool_put(&uvm_amap_pool, amap); 217 return (NULL); 218 } 219 220 /* 221 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM 222 * 223 * => caller should ensure sz is a multiple of PAGE_SIZE 224 * => reference count to new amap is set to one 225 * => new amap is returned unlocked 226 */ 227 228 struct vm_amap * 229 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf) 230 { 231 struct vm_amap *amap; 232 int slots, padslots; 233 UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist); 234 235 AMAP_B2SLOT(slots, sz); /* load slots */ 236 AMAP_B2SLOT(padslots, padsz); 237 238 amap = amap_alloc1(slots, padslots, waitf); 239 if (amap) { 240 memset(amap->am_anon, 0, 241 amap->am_maxslot * sizeof(struct vm_anon *)); 242 amap_list_insert(amap); 243 } 244 245 UVMHIST_LOG(maphist,"<- done, amap = %p, sz=%lu", amap, sz, 0, 0); 246 return(amap); 247 } 248 249 250 /* 251 * amap_free: free an amap 252 * 253 * => the amap must be locked (mainly for simplelock accounting) 254 * => the amap should have a zero reference count and be empty 255 */ 256 void 257 amap_free(struct vm_amap *amap) 258 { 259 UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist); 260 261 KASSERT(amap->am_ref == 0 && amap->am_nused == 0); 262 KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0); 263 264 free(amap->am_slots, M_UVMAMAP); 265 #ifdef UVM_AMAP_PPREF 266 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) 267 free(amap->am_ppref, M_UVMAMAP); 268 #endif 269 pool_put(&uvm_amap_pool, amap); 270 271 UVMHIST_LOG(maphist,"<- done, freed amap = %p", amap, 0, 0, 0); 272 } 273 274 /* 275 * amap_extend: extend the size of an amap (if needed) 276 * 277 * => called from uvm_map when we want to extend an amap to cover 278 * a new mapping (rather than allocate a new one) 279 * => amap should be unlocked (we will lock it) 280 * => to safely extend an amap it should have a reference count of 281 * one (thus it can't be shared) 282 * => XXXCDC: support padding at this level? 283 */ 284 int 285 amap_extend(struct vm_map_entry *entry, vsize_t addsize) 286 { 287 struct vm_amap *amap = entry->aref.ar_amap; 288 int slotoff = entry->aref.ar_pageoff; 289 int slotmapped, slotadd, slotneed, slotalloc; 290 #ifdef UVM_AMAP_PPREF 291 int *newppref, *oldppref; 292 #endif 293 u_int *newsl, *newbck, *oldsl, *oldbck; 294 struct vm_anon **newover, **oldover; 295 int slotadded; 296 UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist); 297 298 UVMHIST_LOG(maphist, " (entry=%p, addsize=%lu)", entry, addsize, 0, 0); 299 300 /* 301 * first, determine how many slots we need in the amap. don't 302 * forget that ar_pageoff could be non-zero: this means that 303 * there are some unused slots before us in the amap. 304 */ 305 306 AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */ 307 AMAP_B2SLOT(slotadd, addsize); /* slots to add */ 308 slotneed = slotoff + slotmapped + slotadd; 309 310 /* 311 * case 1: we already have enough slots in the map and thus 312 * only need to bump the reference counts on the slots we are 313 * adding. 314 */ 315 316 if (amap->am_nslot >= slotneed) { 317 #ifdef UVM_AMAP_PPREF 318 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 319 amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1); 320 } 321 #endif 322 UVMHIST_LOG(maphist,"<- done (case 1), amap = %p, sltneed=%ld", 323 amap, slotneed, 0, 0); 324 return (0); 325 } 326 327 /* 328 * case 2: we pre-allocated slots for use and we just need to 329 * bump nslot up to take account for these slots. 330 */ 331 332 if (amap->am_maxslot >= slotneed) { 333 #ifdef UVM_AMAP_PPREF 334 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 335 if ((slotoff + slotmapped) < amap->am_nslot) 336 amap_pp_adjref(amap, slotoff + slotmapped, 337 (amap->am_nslot - (slotoff + slotmapped)), 338 1); 339 pp_setreflen(amap->am_ppref, amap->am_nslot, 1, 340 slotneed - amap->am_nslot); 341 } 342 #endif 343 amap->am_nslot = slotneed; 344 345 /* 346 * no need to zero am_anon since that was done at 347 * alloc time and we never shrink an allocation. 348 */ 349 UVMHIST_LOG(maphist,"<- done (case 2), amap = %p, slotneed=%ld", 350 amap, slotneed, 0, 0); 351 return (0); 352 } 353 354 /* 355 * case 3: we need to malloc a new amap and copy all the amap 356 * data over from old amap to the new one. 357 * 358 * XXXCDC: could we take advantage of a kernel realloc()? 359 */ 360 361 if (slotneed >= UVM_AMAP_LARGE) 362 return E2BIG; 363 364 slotalloc = malloc_roundup(slotneed * MALLOC_SLOT_UNIT) / 365 MALLOC_SLOT_UNIT; 366 #ifdef UVM_AMAP_PPREF 367 newppref = NULL; 368 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 369 newppref = malloc(slotalloc *sizeof(int), M_UVMAMAP, 370 M_WAITOK | M_CANFAIL); 371 if (newppref == NULL) { 372 /* give up if malloc fails */ 373 free(amap->am_ppref, M_UVMAMAP); 374 amap->am_ppref = PPREF_NONE; 375 } 376 } 377 #endif 378 newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP, 379 M_WAITOK | M_CANFAIL); 380 if (newsl == NULL) { 381 #ifdef UVM_AMAP_PPREF 382 if (newppref != NULL) { 383 free(newppref, M_UVMAMAP); 384 } 385 #endif 386 return (ENOMEM); 387 } 388 newbck = (int *)(((char *)newsl) + slotalloc * sizeof(int)); 389 newover = (struct vm_anon **)(((char *)newbck) + slotalloc * 390 sizeof(int)); 391 KASSERT(amap->am_maxslot < slotneed); 392 393 /* 394 * now copy everything over to new malloc'd areas... 395 */ 396 397 slotadded = slotalloc - amap->am_nslot; 398 399 /* do am_slots */ 400 oldsl = amap->am_slots; 401 memcpy(newsl, oldsl, sizeof(int) * amap->am_nused); 402 amap->am_slots = newsl; 403 404 /* do am_anon */ 405 oldover = amap->am_anon; 406 memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot); 407 memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) * 408 slotadded); 409 amap->am_anon = newover; 410 411 /* do am_bckptr */ 412 oldbck = amap->am_bckptr; 413 memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot); 414 memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: needed? */ 415 amap->am_bckptr = newbck; 416 417 #ifdef UVM_AMAP_PPREF 418 /* do ppref */ 419 oldppref = amap->am_ppref; 420 if (newppref) { 421 memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot); 422 memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded); 423 amap->am_ppref = newppref; 424 if ((slotoff + slotmapped) < amap->am_nslot) 425 amap_pp_adjref(amap, slotoff + slotmapped, 426 (amap->am_nslot - (slotoff + slotmapped)), 1); 427 pp_setreflen(newppref, amap->am_nslot, 1, 428 slotneed - amap->am_nslot); 429 } 430 #endif 431 432 /* update master values */ 433 amap->am_nslot = slotneed; 434 amap->am_maxslot = slotalloc; 435 436 /* and free */ 437 free(oldsl, M_UVMAMAP); 438 #ifdef UVM_AMAP_PPREF 439 if (oldppref && oldppref != PPREF_NONE) 440 free(oldppref, M_UVMAMAP); 441 #endif 442 UVMHIST_LOG(maphist,"<- done (case 3), amap = %p, slotneed=%ld", 443 amap, slotneed, 0, 0); 444 return (0); 445 } 446 447 /* 448 * amap_share_protect: change protection of anons in a shared amap 449 * 450 * for shared amaps, given the current data structure layout, it is 451 * not possible for us to directly locate all maps referencing the 452 * shared anon (to change the protection). in order to protect data 453 * in shared maps we use pmap_page_protect(). [this is useful for IPC 454 * mechanisms like map entry passing that may want to write-protect 455 * all mappings of a shared amap.] we traverse am_anon or am_slots 456 * depending on the current state of the amap. 457 * 458 * => entry's map and amap must be locked by the caller 459 */ 460 void 461 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot) 462 { 463 struct vm_amap *amap = entry->aref.ar_amap; 464 int slots, lcv, slot, stop; 465 466 AMAP_B2SLOT(slots, (entry->end - entry->start)); 467 stop = entry->aref.ar_pageoff + slots; 468 469 if (slots < amap->am_nused) { 470 /* cheaper to traverse am_anon */ 471 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) { 472 if (amap->am_anon[lcv] == NULL) 473 continue; 474 if (amap->am_anon[lcv]->an_page != NULL) 475 pmap_page_protect(amap->am_anon[lcv]->an_page, 476 prot); 477 } 478 return; 479 } 480 481 /* cheaper to traverse am_slots */ 482 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 483 slot = amap->am_slots[lcv]; 484 if (slot < entry->aref.ar_pageoff || slot >= stop) 485 continue; 486 if (amap->am_anon[slot]->an_page != NULL) 487 pmap_page_protect(amap->am_anon[slot]->an_page, prot); 488 } 489 return; 490 } 491 492 /* 493 * amap_wipeout: wipeout all anon's in an amap; then free the amap! 494 * 495 * => called from amap_unref when the final reference to an amap is 496 * discarded (i.e. when reference count == 1) 497 * => the amap should be locked (by the caller) 498 */ 499 500 void 501 amap_wipeout(struct vm_amap *amap) 502 { 503 int lcv, slot; 504 struct vm_anon *anon; 505 UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist); 506 UVMHIST_LOG(maphist,"(amap=%p)", amap, 0,0,0); 507 508 KASSERT(amap->am_ref == 0); 509 510 if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) { 511 /* 512 * amap_swap_off will call us again. 513 */ 514 return; 515 } 516 amap_list_remove(amap); 517 518 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 519 int refs; 520 521 slot = amap->am_slots[lcv]; 522 anon = amap->am_anon[slot]; 523 524 if (anon == NULL || anon->an_ref == 0) 525 panic("amap_wipeout: corrupt amap"); 526 527 simple_lock(&anon->an_lock); /* lock anon */ 528 529 UVMHIST_LOG(maphist," processing anon %p, ref=%ld", anon, 530 anon->an_ref, 0, 0); 531 532 refs = --anon->an_ref; 533 simple_unlock(&anon->an_lock); 534 if (refs == 0) { 535 /* 536 * we had the last reference to a vm_anon. free it. 537 */ 538 uvm_anfree(anon); 539 } 540 } 541 542 /* 543 * now we free the map 544 */ 545 546 amap->am_ref = 0; /* ... was one */ 547 amap->am_nused = 0; 548 amap_free(amap); /* will unlock and free amap */ 549 UVMHIST_LOG(maphist,"<- done!", 0,0,0,0); 550 } 551 552 /* 553 * amap_copy: ensure that a map entry's "needs_copy" flag is false 554 * by copying the amap if necessary. 555 * 556 * => an entry with a null amap pointer will get a new (blank) one. 557 * => the map that the map entry belongs to must be locked by caller. 558 * => the amap currently attached to "entry" (if any) must be unlocked. 559 * => if canchunk is true, then we may clip the entry into a chunk 560 * => "startva" and "endva" are used only if canchunk is true. they are 561 * used to limit chunking (e.g. if you have a large space that you 562 * know you are going to need to allocate amaps for, there is no point 563 * in allowing that to be chunked) 564 */ 565 566 void 567 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf, 568 boolean_t canchunk, vaddr_t startva, vaddr_t endva) 569 { 570 struct vm_amap *amap, *srcamap; 571 int slots, lcv; 572 vaddr_t chunksize; 573 UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist); 574 UVMHIST_LOG(maphist, " (map=%p, entry=%p, waitf=%ld)", 575 map, entry, waitf, 0); 576 577 /* 578 * is there a map to copy? if not, create one from scratch. 579 */ 580 581 if (entry->aref.ar_amap == NULL) { 582 583 /* 584 * check to see if we have a large amap that we can 585 * chunk. we align startva/endva to chunk-sized 586 * boundaries and then clip to them. 587 */ 588 589 if (canchunk && atop(entry->end - entry->start) >= 590 UVM_AMAP_LARGE) { 591 /* convert slots to bytes */ 592 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT; 593 startva = (startva / chunksize) * chunksize; 594 endva = roundup(endva, chunksize); 595 UVMHIST_LOG(maphist, " chunk amap ==> clip " 596 "0x%lx->0x%lx to 0x%lx->0x%lx", 597 entry->start, entry->end, startva, endva); 598 UVM_MAP_CLIP_START(map, entry, startva); 599 /* watch out for endva wrap-around! */ 600 if (endva >= startva) 601 UVM_MAP_CLIP_END(map, entry, endva); 602 } 603 604 UVMHIST_LOG(maphist, "<- done [creating new amap 0x%lx->0x%lx]", 605 entry->start, entry->end, 0, 0); 606 entry->aref.ar_pageoff = 0; 607 entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0, 608 waitf); 609 if (entry->aref.ar_amap != NULL) 610 entry->etype &= ~UVM_ET_NEEDSCOPY; 611 return; 612 } 613 614 /* 615 * first check and see if we are the only map entry 616 * referencing the amap we currently have. if so, then we can 617 * just take it over rather than copying it. note that we are 618 * reading am_ref with the amap unlocked... the value can only 619 * be one if we have the only reference to the amap (via our 620 * locked map). if we are greater than one we fall through to 621 * the next case (where we double check the value). 622 */ 623 624 if (entry->aref.ar_amap->am_ref == 1) { 625 entry->etype &= ~UVM_ET_NEEDSCOPY; 626 UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]", 627 0, 0, 0, 0); 628 return; 629 } 630 631 /* 632 * looks like we need to copy the map. 633 */ 634 635 UVMHIST_LOG(maphist," amap=%p, ref=%ld, must copy it", 636 entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0); 637 AMAP_B2SLOT(slots, entry->end - entry->start); 638 amap = amap_alloc1(slots, 0, waitf); 639 if (amap == NULL) { 640 UVMHIST_LOG(maphist, " amap_alloc1 failed", 0,0,0,0); 641 return; 642 } 643 srcamap = entry->aref.ar_amap; 644 645 /* 646 * need to double check reference count now that we've got the 647 * src amap locked down. the reference count could have 648 * changed while we were in malloc. if the reference count 649 * dropped down to one we take over the old map rather than 650 * copying the amap. 651 */ 652 653 if (srcamap->am_ref == 1) { /* take it over? */ 654 entry->etype &= ~UVM_ET_NEEDSCOPY; 655 amap->am_ref--; /* drop final reference to map */ 656 amap_free(amap); /* dispose of new (unused) amap */ 657 return; 658 } 659 660 /* 661 * we must copy it now. 662 */ 663 664 UVMHIST_LOG(maphist, " copying amap now",0, 0, 0, 0); 665 for (lcv = 0 ; lcv < slots; lcv++) { 666 amap->am_anon[lcv] = 667 srcamap->am_anon[entry->aref.ar_pageoff + lcv]; 668 if (amap->am_anon[lcv] == NULL) 669 continue; 670 simple_lock(&amap->am_anon[lcv]->an_lock); 671 amap->am_anon[lcv]->an_ref++; 672 simple_unlock(&amap->am_anon[lcv]->an_lock); 673 amap->am_bckptr[lcv] = amap->am_nused; 674 amap->am_slots[amap->am_nused] = lcv; 675 amap->am_nused++; 676 } 677 memset(&amap->am_anon[lcv], 0, 678 (amap->am_maxslot - lcv) * sizeof(struct vm_anon *)); 679 680 /* 681 * drop our reference to the old amap (srcamap) and unlock. 682 * we know that the reference count on srcamap is greater than 683 * one (we checked above), so there is no way we could drop 684 * the count to zero. [and no need to worry about freeing it] 685 */ 686 687 srcamap->am_ref--; 688 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) 689 srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 690 #ifdef UVM_AMAP_PPREF 691 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) { 692 amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 693 (entry->end - entry->start) >> PAGE_SHIFT, -1); 694 } 695 #endif 696 697 /* 698 * install new amap. 699 */ 700 701 entry->aref.ar_pageoff = 0; 702 entry->aref.ar_amap = amap; 703 entry->etype &= ~UVM_ET_NEEDSCOPY; 704 705 amap_list_insert(amap); 706 707 /* 708 * done! 709 */ 710 UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0); 711 } 712 713 /* 714 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2) 715 * 716 * called during fork(2) when the parent process has a wired map 717 * entry. in that case we want to avoid write-protecting pages 718 * in the parent's map (e.g. like what you'd do for a COW page) 719 * so we resolve the COW here. 720 * 721 * => assume parent's entry was wired, thus all pages are resident. 722 * => assume pages that are loaned out (loan_count) are already mapped 723 * read-only in all maps, and thus no need for us to worry about them 724 * => assume both parent and child vm_map's are locked 725 * => caller passes child's map/entry in to us 726 * => if we run out of memory we will unlock the amap and sleep _with_ the 727 * parent and child vm_map's locked(!). we have to do this since 728 * we are in the middle of a fork(2) and we can't let the parent 729 * map change until we are done copying all the map entries. 730 * => XXXCDC: out of memory should cause fork to fail, but there is 731 * currently no easy way to do this (needs fix) 732 * => page queues must be unlocked (we may lock them) 733 */ 734 735 void 736 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry) 737 { 738 struct vm_amap *amap = entry->aref.ar_amap; 739 int lcv, slot; 740 struct vm_anon *anon, *nanon; 741 struct vm_page *pg, *npg; 742 743 /* 744 * note that if we unlock the amap then we must ReStart the "lcv" for 745 * loop because some other process could reorder the anon's in the 746 * am_anon[] array on us while the lock is dropped. 747 */ 748 ReStart: 749 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 750 751 /* 752 * get the page 753 */ 754 755 slot = amap->am_slots[lcv]; 756 anon = amap->am_anon[slot]; 757 simple_lock(&anon->an_lock); 758 pg = anon->an_page; 759 760 /* 761 * page must be resident since parent is wired 762 */ 763 764 if (pg == NULL) 765 panic("amap_cow_now: non-resident wired page in anon %p", 766 anon); 767 768 /* 769 * if the anon ref count is one and the page is not loaned, 770 * then we are safe (the child has exclusive access to the 771 * page). if the page is loaned, then it must already be 772 * mapped read-only. 773 * 774 * we only need to get involved when these are not true. 775 * [note: if loan_count == 0, then the anon must own the page] 776 */ 777 778 if (anon->an_ref > 1 && pg->loan_count == 0) { 779 780 /* 781 * if the page is busy then we have to unlock, wait for 782 * it and then restart. 783 */ 784 if (pg->pg_flags & PG_BUSY) { 785 atomic_setbits_int(&pg->pg_flags, PG_WANTED); 786 UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE, 787 "cownow", 0); 788 goto ReStart; 789 } 790 791 /* 792 * ok, time to do a copy-on-write to a new anon 793 */ 794 nanon = uvm_analloc(); 795 if (nanon) { 796 npg = uvm_pagealloc(NULL, 0, nanon, 0); 797 } else 798 npg = NULL; /* XXX: quiet gcc warning */ 799 800 if (nanon == NULL || npg == NULL) { 801 /* out of memory */ 802 /* 803 * XXXCDC: we should cause fork to fail, but 804 * we can't ... 805 */ 806 if (nanon) { 807 simple_lock(&nanon->an_lock); 808 uvm_anfree(nanon); 809 } 810 simple_unlock(&anon->an_lock); 811 uvm_wait("cownowpage"); 812 goto ReStart; 813 } 814 815 /* 816 * got it... now we can copy the data and replace anon 817 * with our new one... 818 */ 819 uvm_pagecopy(pg, npg); /* old -> new */ 820 anon->an_ref--; /* can't drop to zero */ 821 amap->am_anon[slot] = nanon; /* replace */ 822 823 /* 824 * drop PG_BUSY on new page ... since we have had it's 825 * owner locked the whole time it can't be 826 * PG_RELEASED | PG_WANTED. 827 */ 828 atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE); 829 UVM_PAGE_OWN(npg, NULL); 830 uvm_lock_pageq(); 831 uvm_pageactivate(npg); 832 uvm_unlock_pageq(); 833 } 834 835 simple_unlock(&anon->an_lock); 836 /* 837 * done with this anon, next ...! 838 */ 839 840 } /* end of 'for' loop */ 841 } 842 843 /* 844 * amap_splitref: split a single reference into two separate references 845 * 846 * => called from uvm_map's clip routines 847 * => origref's map should be locked 848 * => origref->ar_amap should be unlocked (we will lock) 849 */ 850 void 851 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset) 852 { 853 int leftslots; 854 855 AMAP_B2SLOT(leftslots, offset); 856 if (leftslots == 0) 857 panic("amap_splitref: split at zero offset"); 858 859 /* 860 * now: amap is locked and we have a valid am_mapped array. 861 */ 862 863 if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0) 864 panic("amap_splitref: map size check failed"); 865 866 #ifdef UVM_AMAP_PPREF 867 /* 868 * establish ppref before we add a duplicate reference to the amap 869 */ 870 if (origref->ar_amap->am_ppref == NULL) 871 amap_pp_establish(origref->ar_amap); 872 #endif 873 874 splitref->ar_amap = origref->ar_amap; 875 splitref->ar_amap->am_ref++; /* not a share reference */ 876 splitref->ar_pageoff = origref->ar_pageoff + leftslots; 877 } 878 879 #ifdef UVM_AMAP_PPREF 880 881 /* 882 * amap_pp_establish: add a ppref array to an amap, if possible 883 * 884 * => amap locked by caller 885 */ 886 void 887 amap_pp_establish(struct vm_amap *amap) 888 { 889 890 amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot, 891 M_UVMAMAP, M_NOWAIT|M_ZERO); 892 893 /* 894 * if we fail then we just won't use ppref for this amap 895 */ 896 if (amap->am_ppref == NULL) { 897 amap->am_ppref = PPREF_NONE; /* not using it */ 898 return; 899 } 900 901 /* 902 * init ppref 903 */ 904 pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot); 905 } 906 907 /* 908 * amap_pp_adjref: adjust reference count to a part of an amap using the 909 * per-page reference count array. 910 * 911 * => map and amap locked by caller 912 * => caller must check that ppref != PPREF_NONE before calling 913 */ 914 void 915 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval) 916 { 917 int stopslot, *ppref, lcv, prevlcv; 918 int ref, len, prevref, prevlen; 919 920 stopslot = curslot + slotlen; 921 ppref = amap->am_ppref; 922 prevlcv = 0; 923 924 /* 925 * first advance to the correct place in the ppref array, 926 * fragment if needed. 927 */ 928 929 for (lcv = 0 ; lcv < curslot ; lcv += len) { 930 pp_getreflen(ppref, lcv, &ref, &len); 931 if (lcv + len > curslot) { /* goes past start? */ 932 pp_setreflen(ppref, lcv, ref, curslot - lcv); 933 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv)); 934 len = curslot - lcv; /* new length of entry @ lcv */ 935 } 936 prevlcv = lcv; 937 } 938 if (lcv != 0) 939 pp_getreflen(ppref, prevlcv, &prevref, &prevlen); 940 else { 941 /* Ensure that the "prevref == ref" test below always 942 * fails, since we're starting from the beginning of 943 * the ppref array; that is, there is no previous 944 * chunk. 945 */ 946 prevref = -1; 947 prevlen = 0; 948 } 949 950 /* 951 * now adjust reference counts in range. merge the first 952 * changed entry with the last unchanged entry if possible. 953 */ 954 955 if (lcv != curslot) 956 panic("amap_pp_adjref: overshot target"); 957 958 for (/* lcv already set */; lcv < stopslot ; lcv += len) { 959 pp_getreflen(ppref, lcv, &ref, &len); 960 if (lcv + len > stopslot) { /* goes past end? */ 961 pp_setreflen(ppref, lcv, ref, stopslot - lcv); 962 pp_setreflen(ppref, stopslot, ref, 963 len - (stopslot - lcv)); 964 len = stopslot - lcv; 965 } 966 ref += adjval; 967 if (ref < 0) 968 panic("amap_pp_adjref: negative reference count"); 969 if (lcv == prevlcv + prevlen && ref == prevref) { 970 pp_setreflen(ppref, prevlcv, ref, prevlen + len); 971 } else { 972 pp_setreflen(ppref, lcv, ref, len); 973 } 974 if (ref == 0) 975 amap_wiperange(amap, lcv, len); 976 } 977 978 } 979 980 /* 981 * amap_wiperange: wipe out a range of an amap 982 * [different from amap_wipeout because the amap is kept intact] 983 * 984 * => both map and amap must be locked by caller. 985 */ 986 void 987 amap_wiperange(struct vm_amap *amap, int slotoff, int slots) 988 { 989 int byanon, lcv, stop, curslot, ptr, slotend; 990 struct vm_anon *anon; 991 992 /* 993 * we can either traverse the amap by am_anon or by am_slots depending 994 * on which is cheaper. decide now. 995 */ 996 997 if (slots < amap->am_nused) { 998 byanon = TRUE; 999 lcv = slotoff; 1000 stop = slotoff + slots; 1001 } else { 1002 byanon = FALSE; 1003 lcv = 0; 1004 stop = amap->am_nused; 1005 slotend = slotoff + slots; 1006 } 1007 1008 while (lcv < stop) { 1009 int refs; 1010 1011 if (byanon) { 1012 curslot = lcv++; /* lcv advances here */ 1013 if (amap->am_anon[curslot] == NULL) 1014 continue; 1015 } else { 1016 curslot = amap->am_slots[lcv]; 1017 if (curslot < slotoff || curslot >= slotend) { 1018 lcv++; /* lcv advances here */ 1019 continue; 1020 } 1021 stop--; /* drop stop, since anon will be removed */ 1022 } 1023 anon = amap->am_anon[curslot]; 1024 1025 /* 1026 * remove it from the amap 1027 */ 1028 amap->am_anon[curslot] = NULL; 1029 ptr = amap->am_bckptr[curslot]; 1030 if (ptr != (amap->am_nused - 1)) { 1031 amap->am_slots[ptr] = 1032 amap->am_slots[amap->am_nused - 1]; 1033 amap->am_bckptr[amap->am_slots[ptr]] = 1034 ptr; /* back ptr. */ 1035 } 1036 amap->am_nused--; 1037 1038 /* 1039 * drop anon reference count 1040 */ 1041 simple_lock(&anon->an_lock); 1042 refs = --anon->an_ref; 1043 simple_unlock(&anon->an_lock); 1044 if (refs == 0) { 1045 /* 1046 * we just eliminated the last reference to an anon. 1047 * free it. 1048 */ 1049 uvm_anfree(anon); 1050 } 1051 } 1052 } 1053 1054 #endif 1055 1056 /* 1057 * amap_swap_off: pagein anonymous pages in amaps and drop swap slots. 1058 * 1059 * => called with swap_syscall_lock held. 1060 * => note that we don't always traverse all anons. 1061 * eg. amaps being wiped out, released anons. 1062 * => return TRUE if failed. 1063 */ 1064 1065 boolean_t 1066 amap_swap_off(int startslot, int endslot) 1067 { 1068 struct vm_amap *am; 1069 struct vm_amap *am_next; 1070 struct vm_amap marker_prev; 1071 struct vm_amap marker_next; 1072 boolean_t rv = FALSE; 1073 1074 #if defined(DIAGNOSTIC) 1075 memset(&marker_prev, 0, sizeof(marker_prev)); 1076 memset(&marker_next, 0, sizeof(marker_next)); 1077 #endif /* defined(DIAGNOSTIC) */ 1078 1079 for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) { 1080 int i; 1081 1082 LIST_INSERT_BEFORE(am, &marker_prev, am_list); 1083 LIST_INSERT_AFTER(am, &marker_next, am_list); 1084 1085 if (am->am_nused <= 0) { 1086 goto next; 1087 } 1088 1089 for (i = 0; i < am->am_nused; i++) { 1090 int slot; 1091 int swslot; 1092 struct vm_anon *anon; 1093 1094 slot = am->am_slots[i]; 1095 anon = am->am_anon[slot]; 1096 simple_lock(&anon->an_lock); 1097 1098 swslot = anon->an_swslot; 1099 if (swslot < startslot || endslot <= swslot) { 1100 simple_unlock(&anon->an_lock); 1101 continue; 1102 } 1103 1104 am->am_flags |= AMAP_SWAPOFF; 1105 1106 rv = uvm_anon_pagein(anon); 1107 1108 am->am_flags &= ~AMAP_SWAPOFF; 1109 if (amap_refs(am) == 0) { 1110 amap_wipeout(am); 1111 am = NULL; 1112 break; 1113 } 1114 if (rv) { 1115 break; 1116 } 1117 i = 0; 1118 } 1119 1120 next: 1121 KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next || 1122 LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) == 1123 &marker_next); 1124 am_next = LIST_NEXT(&marker_next, am_list); 1125 LIST_REMOVE(&marker_prev, am_list); 1126 LIST_REMOVE(&marker_next, am_list); 1127 } 1128 1129 return rv; 1130 } 1131 1132 /* 1133 * amap_lookup: look up a page in an amap 1134 * 1135 * => amap should be locked by caller. 1136 */ 1137 struct vm_anon * 1138 amap_lookup(struct vm_aref *aref, vaddr_t offset) 1139 { 1140 int slot; 1141 struct vm_amap *amap = aref->ar_amap; 1142 UVMHIST_FUNC("amap_lookup"); UVMHIST_CALLED(maphist); 1143 1144 AMAP_B2SLOT(slot, offset); 1145 slot += aref->ar_pageoff; 1146 1147 if (slot >= amap->am_nslot) 1148 panic("amap_lookup: offset out of range"); 1149 1150 UVMHIST_LOG(maphist, "<- done (amap=%p, offset=0x%lx, result=%p)", 1151 amap, offset, amap->am_anon[slot], 0); 1152 return(amap->am_anon[slot]); 1153 } 1154 1155 /* 1156 * amap_lookups: look up a range of pages in an amap 1157 * 1158 * => amap should be locked by caller. 1159 * => XXXCDC: this interface is biased toward array-based amaps. fix. 1160 */ 1161 void 1162 amap_lookups(struct vm_aref *aref, vaddr_t offset, 1163 struct vm_anon **anons, int npages) 1164 { 1165 int slot; 1166 struct vm_amap *amap = aref->ar_amap; 1167 UVMHIST_FUNC("amap_lookups"); UVMHIST_CALLED(maphist); 1168 1169 AMAP_B2SLOT(slot, offset); 1170 slot += aref->ar_pageoff; 1171 1172 UVMHIST_LOG(maphist, " slot=%ld, npages=%ld, nslot=%ld", slot, npages, 1173 amap->am_nslot, 0); 1174 1175 if ((slot + (npages - 1)) >= amap->am_nslot) 1176 panic("amap_lookups: offset out of range"); 1177 1178 memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *)); 1179 1180 UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0); 1181 return; 1182 } 1183 1184 /* 1185 * amap_add: add (or replace) a page to an amap 1186 * 1187 * => caller must lock amap. 1188 * => if (replace) caller must lock anon because we might have to call 1189 * pmap_page_protect on the anon's page. 1190 * => returns an "offset" which is meaningful to amap_unadd(). 1191 */ 1192 void 1193 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon, 1194 boolean_t replace) 1195 { 1196 int slot; 1197 struct vm_amap *amap = aref->ar_amap; 1198 UVMHIST_FUNC("amap_add"); UVMHIST_CALLED(maphist); 1199 1200 AMAP_B2SLOT(slot, offset); 1201 slot += aref->ar_pageoff; 1202 1203 if (slot >= amap->am_nslot) 1204 panic("amap_add: offset out of range"); 1205 1206 if (replace) { 1207 1208 if (amap->am_anon[slot] == NULL) 1209 panic("amap_add: replacing null anon"); 1210 if (amap->am_anon[slot]->an_page != NULL && 1211 (amap->am_flags & AMAP_SHARED) != 0) { 1212 pmap_page_protect(amap->am_anon[slot]->an_page, 1213 VM_PROT_NONE); 1214 /* 1215 * XXX: suppose page is supposed to be wired somewhere? 1216 */ 1217 } 1218 } else { /* !replace */ 1219 if (amap->am_anon[slot] != NULL) 1220 panic("amap_add: slot in use"); 1221 1222 amap->am_bckptr[slot] = amap->am_nused; 1223 amap->am_slots[amap->am_nused] = slot; 1224 amap->am_nused++; 1225 } 1226 amap->am_anon[slot] = anon; 1227 UVMHIST_LOG(maphist, 1228 "<- done (amap=%p, offset=0x%lx, anon=%p, rep=%ld)", 1229 amap, offset, anon, replace); 1230 } 1231 1232 /* 1233 * amap_unadd: remove a page from an amap 1234 * 1235 * => caller must lock amap 1236 */ 1237 void 1238 amap_unadd(struct vm_aref *aref, vaddr_t offset) 1239 { 1240 int ptr, slot; 1241 struct vm_amap *amap = aref->ar_amap; 1242 UVMHIST_FUNC("amap_unadd"); UVMHIST_CALLED(maphist); 1243 1244 AMAP_B2SLOT(slot, offset); 1245 slot += aref->ar_pageoff; 1246 1247 if (slot >= amap->am_nslot) 1248 panic("amap_unadd: offset out of range"); 1249 1250 if (amap->am_anon[slot] == NULL) 1251 panic("amap_unadd: nothing there"); 1252 1253 amap->am_anon[slot] = NULL; 1254 ptr = amap->am_bckptr[slot]; 1255 1256 if (ptr != (amap->am_nused - 1)) { /* swap to keep slots contig? */ 1257 amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1]; 1258 amap->am_bckptr[amap->am_slots[ptr]] = ptr; /* back link */ 1259 } 1260 amap->am_nused--; 1261 UVMHIST_LOG(maphist, "<- done (amap=%p, slot=%ld)", amap, slot,0, 0); 1262 } 1263 1264 /* 1265 * amap_ref: gain a reference to an amap 1266 * 1267 * => amap must not be locked (we will lock) 1268 * => "offset" and "len" are in units of pages 1269 * => called at fork time to gain the child's reference 1270 */ 1271 void 1272 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags) 1273 { 1274 UVMHIST_FUNC("amap_ref"); UVMHIST_CALLED(maphist); 1275 1276 amap->am_ref++; 1277 if (flags & AMAP_SHARED) 1278 amap->am_flags |= AMAP_SHARED; 1279 #ifdef UVM_AMAP_PPREF 1280 if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 && 1281 len != amap->am_nslot) 1282 amap_pp_establish(amap); 1283 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 1284 if (flags & AMAP_REFALL) 1285 amap_pp_adjref(amap, 0, amap->am_nslot, 1); 1286 else 1287 amap_pp_adjref(amap, offset, len, 1); 1288 } 1289 #endif 1290 UVMHIST_LOG(maphist,"<- done! amap=%p", amap, 0, 0, 0); 1291 } 1292 1293 /* 1294 * amap_unref: remove a reference to an amap 1295 * 1296 * => caller must remove all pmap-level references to this amap before 1297 * dropping the reference 1298 * => called from uvm_unmap_detach [only] ... note that entry is no 1299 * longer part of a map and thus has no need for locking 1300 * => amap must be unlocked (we will lock it). 1301 */ 1302 void 1303 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all) 1304 { 1305 UVMHIST_FUNC("amap_unref"); UVMHIST_CALLED(maphist); 1306 1307 UVMHIST_LOG(maphist," amap=%p refs=%ld, nused=%ld", 1308 amap, amap->am_ref, amap->am_nused, 0); 1309 1310 /* 1311 * if we are the last reference, free the amap and return. 1312 */ 1313 1314 if (amap->am_ref-- == 1) { 1315 amap_wipeout(amap); /* drops final ref and frees */ 1316 UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0); 1317 return; /* no need to unlock */ 1318 } 1319 1320 /* 1321 * otherwise just drop the reference count(s) 1322 */ 1323 if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0) 1324 amap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 1325 #ifdef UVM_AMAP_PPREF 1326 if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot) 1327 amap_pp_establish(amap); 1328 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 1329 if (all) 1330 amap_pp_adjref(amap, 0, amap->am_nslot, -1); 1331 else 1332 amap_pp_adjref(amap, offset, len, -1); 1333 } 1334 #endif 1335 1336 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0); 1337 } 1338