1 /* $OpenBSD: uvm_amap.c,v 1.46 2011/07/03 18:34:14 oga Exp $ */ 2 /* $NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $ */ 3 4 /* 5 * 6 * Copyright (c) 1997 Charles D. Cranor and Washington University. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by Charles D. Cranor and 20 * Washington University. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 29 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * uvm_amap.c: amap operations 38 */ 39 40 /* 41 * this file contains functions that perform operations on amaps. see 42 * uvm_amap.h for a brief explanation of the role of amaps in uvm. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/proc.h> 48 #include <sys/malloc.h> 49 #include <sys/kernel.h> 50 #include <sys/pool.h> 51 52 #include <uvm/uvm.h> 53 #include <uvm/uvm_swap.h> 54 55 /* 56 * pool for allocation of vm_map structures. note that the pool has 57 * its own simplelock for its protection. also note that in order to 58 * avoid an endless loop, the amap pool's allocator cannot allocate 59 * memory from an amap (it currently goes through the kernel uobj, so 60 * we are ok). 61 */ 62 63 struct pool uvm_amap_pool; 64 65 LIST_HEAD(, vm_amap) amap_list; 66 67 #define MALLOC_SLOT_UNIT (2 * sizeof(int) + sizeof(struct vm_anon *)) 68 69 /* 70 * local functions 71 */ 72 73 static struct vm_amap *amap_alloc1(int, int, int); 74 static __inline void amap_list_insert(struct vm_amap *); 75 static __inline void amap_list_remove(struct vm_amap *); 76 77 static __inline void 78 amap_list_insert(struct vm_amap *amap) 79 { 80 LIST_INSERT_HEAD(&amap_list, amap, am_list); 81 } 82 83 static __inline void 84 amap_list_remove(struct vm_amap *amap) 85 { 86 LIST_REMOVE(amap, am_list); 87 } 88 89 #ifdef UVM_AMAP_PPREF 90 /* 91 * what is ppref? ppref is an _optional_ amap feature which is used 92 * to keep track of reference counts on a per-page basis. it is enabled 93 * when UVM_AMAP_PPREF is defined. 94 * 95 * when enabled, an array of ints is allocated for the pprefs. this 96 * array is allocated only when a partial reference is added to the 97 * map (either by unmapping part of the amap, or gaining a reference 98 * to only a part of an amap). if the malloc of the array fails 99 * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate 100 * that we tried to do ppref's but couldn't alloc the array so just 101 * give up (after all, this is an optional feature!). 102 * 103 * the array is divided into page sized "chunks." for chunks of length 1, 104 * the chunk reference count plus one is stored in that chunk's slot. 105 * for chunks of length > 1 the first slot contains (the reference count 106 * plus one) * -1. [the negative value indicates that the length is 107 * greater than one.] the second slot of the chunk contains the length 108 * of the chunk. here is an example: 109 * 110 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1 111 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x 112 * <----------><-><----><-------><----><-><-------> 113 * (x = don't care) 114 * 115 * this allows us to allow one int to contain the ref count for the whole 116 * chunk. note that the "plus one" part is needed because a reference 117 * count of zero is neither positive or negative (need a way to tell 118 * if we've got one zero or a bunch of them). 119 * 120 * here are some in-line functions to help us. 121 */ 122 123 static __inline void pp_getreflen(int *, int, int *, int *); 124 static __inline void pp_setreflen(int *, int, int, int); 125 126 /* 127 * pp_getreflen: get the reference and length for a specific offset 128 * 129 * => ppref's amap must be locked 130 */ 131 static __inline void 132 pp_getreflen(int *ppref, int offset, int *refp, int *lenp) 133 { 134 135 if (ppref[offset] > 0) { /* chunk size must be 1 */ 136 *refp = ppref[offset] - 1; /* don't forget to adjust */ 137 *lenp = 1; 138 } else { 139 *refp = (ppref[offset] * -1) - 1; 140 *lenp = ppref[offset+1]; 141 } 142 } 143 144 /* 145 * pp_setreflen: set the reference and length for a specific offset 146 * 147 * => ppref's amap must be locked 148 */ 149 static __inline void 150 pp_setreflen(int *ppref, int offset, int ref, int len) 151 { 152 if (len == 1) { 153 ppref[offset] = ref + 1; 154 } else { 155 ppref[offset] = (ref + 1) * -1; 156 ppref[offset+1] = len; 157 } 158 } 159 #endif 160 161 /* 162 * amap_init: called at boot time to init global amap data structures 163 */ 164 165 void 166 amap_init(void) 167 { 168 /* 169 * Initialize the vm_amap pool. 170 */ 171 pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0, 172 "amappl", &pool_allocator_nointr); 173 pool_sethiwat(&uvm_amap_pool, 4096); 174 } 175 176 /* 177 * amap_alloc1: internal function that allocates an amap, but does not 178 * init the overlay. 179 * 180 * => lock on returned amap is init'd 181 */ 182 static inline struct vm_amap * 183 amap_alloc1(int slots, int padslots, int waitf) 184 { 185 struct vm_amap *amap; 186 int totalslots; 187 188 amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK 189 : PR_NOWAIT); 190 if (amap == NULL) 191 return(NULL); 192 193 totalslots = malloc_roundup((slots + padslots) * MALLOC_SLOT_UNIT) / 194 MALLOC_SLOT_UNIT; 195 amap->am_ref = 1; 196 amap->am_flags = 0; 197 #ifdef UVM_AMAP_PPREF 198 amap->am_ppref = NULL; 199 #endif 200 amap->am_maxslot = totalslots; 201 amap->am_nslot = slots; 202 amap->am_nused = 0; 203 204 amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT, M_UVMAMAP, 205 waitf); 206 if (amap->am_slots == NULL) 207 goto fail1; 208 209 amap->am_bckptr = (int *)(((char *)amap->am_slots) + totalslots * 210 sizeof(int)); 211 amap->am_anon = (struct vm_anon **)(((char *)amap->am_bckptr) + 212 totalslots * sizeof(int)); 213 214 return(amap); 215 216 fail1: 217 pool_put(&uvm_amap_pool, amap); 218 return (NULL); 219 } 220 221 /* 222 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM 223 * 224 * => caller should ensure sz is a multiple of PAGE_SIZE 225 * => reference count to new amap is set to one 226 * => new amap is returned unlocked 227 */ 228 229 struct vm_amap * 230 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf) 231 { 232 struct vm_amap *amap; 233 int slots, padslots; 234 235 AMAP_B2SLOT(slots, sz); /* load slots */ 236 AMAP_B2SLOT(padslots, padsz); 237 238 amap = amap_alloc1(slots, padslots, waitf); 239 if (amap) { 240 memset(amap->am_anon, 0, 241 amap->am_maxslot * sizeof(struct vm_anon *)); 242 amap_list_insert(amap); 243 } 244 245 return(amap); 246 } 247 248 249 /* 250 * amap_free: free an amap 251 * 252 * => the amap must be locked (mainly for simplelock accounting) 253 * => the amap should have a zero reference count and be empty 254 */ 255 void 256 amap_free(struct vm_amap *amap) 257 { 258 259 KASSERT(amap->am_ref == 0 && amap->am_nused == 0); 260 KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0); 261 262 free(amap->am_slots, M_UVMAMAP); 263 #ifdef UVM_AMAP_PPREF 264 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) 265 free(amap->am_ppref, M_UVMAMAP); 266 #endif 267 pool_put(&uvm_amap_pool, amap); 268 269 } 270 271 /* 272 * amap_extend: extend the size of an amap (if needed) 273 * 274 * => called from uvm_map when we want to extend an amap to cover 275 * a new mapping (rather than allocate a new one) 276 * => amap should be unlocked (we will lock it) 277 * => to safely extend an amap it should have a reference count of 278 * one (thus it can't be shared) 279 * => XXXCDC: support padding at this level? 280 */ 281 int 282 amap_extend(struct vm_map_entry *entry, vsize_t addsize) 283 { 284 struct vm_amap *amap = entry->aref.ar_amap; 285 int slotoff = entry->aref.ar_pageoff; 286 int slotmapped, slotadd, slotneed, slotalloc; 287 #ifdef UVM_AMAP_PPREF 288 int *newppref, *oldppref; 289 #endif 290 u_int *newsl, *newbck, *oldsl, *oldbck; 291 struct vm_anon **newover, **oldover; 292 int slotadded; 293 294 /* 295 * first, determine how many slots we need in the amap. don't 296 * forget that ar_pageoff could be non-zero: this means that 297 * there are some unused slots before us in the amap. 298 */ 299 300 AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */ 301 AMAP_B2SLOT(slotadd, addsize); /* slots to add */ 302 slotneed = slotoff + slotmapped + slotadd; 303 304 /* 305 * case 1: we already have enough slots in the map and thus 306 * only need to bump the reference counts on the slots we are 307 * adding. 308 */ 309 310 if (amap->am_nslot >= slotneed) { 311 #ifdef UVM_AMAP_PPREF 312 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 313 amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1); 314 } 315 #endif 316 return (0); 317 } 318 319 /* 320 * case 2: we pre-allocated slots for use and we just need to 321 * bump nslot up to take account for these slots. 322 */ 323 324 if (amap->am_maxslot >= slotneed) { 325 #ifdef UVM_AMAP_PPREF 326 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 327 if ((slotoff + slotmapped) < amap->am_nslot) 328 amap_pp_adjref(amap, slotoff + slotmapped, 329 (amap->am_nslot - (slotoff + slotmapped)), 330 1); 331 pp_setreflen(amap->am_ppref, amap->am_nslot, 1, 332 slotneed - amap->am_nslot); 333 } 334 #endif 335 amap->am_nslot = slotneed; 336 337 /* 338 * no need to zero am_anon since that was done at 339 * alloc time and we never shrink an allocation. 340 */ 341 return (0); 342 } 343 344 /* 345 * case 3: we need to malloc a new amap and copy all the amap 346 * data over from old amap to the new one. 347 * 348 * XXXCDC: could we take advantage of a kernel realloc()? 349 */ 350 351 if (slotneed >= UVM_AMAP_LARGE) 352 return E2BIG; 353 354 slotalloc = malloc_roundup(slotneed * MALLOC_SLOT_UNIT) / 355 MALLOC_SLOT_UNIT; 356 #ifdef UVM_AMAP_PPREF 357 newppref = NULL; 358 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 359 newppref = malloc(slotalloc *sizeof(int), M_UVMAMAP, 360 M_WAITOK | M_CANFAIL); 361 if (newppref == NULL) { 362 /* give up if malloc fails */ 363 free(amap->am_ppref, M_UVMAMAP); 364 amap->am_ppref = PPREF_NONE; 365 } 366 } 367 #endif 368 newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP, 369 M_WAITOK | M_CANFAIL); 370 if (newsl == NULL) { 371 #ifdef UVM_AMAP_PPREF 372 if (newppref != NULL) { 373 free(newppref, M_UVMAMAP); 374 } 375 #endif 376 return (ENOMEM); 377 } 378 newbck = (int *)(((char *)newsl) + slotalloc * sizeof(int)); 379 newover = (struct vm_anon **)(((char *)newbck) + slotalloc * 380 sizeof(int)); 381 KASSERT(amap->am_maxslot < slotneed); 382 383 /* 384 * now copy everything over to new malloc'd areas... 385 */ 386 387 slotadded = slotalloc - amap->am_nslot; 388 389 /* do am_slots */ 390 oldsl = amap->am_slots; 391 memcpy(newsl, oldsl, sizeof(int) * amap->am_nused); 392 amap->am_slots = newsl; 393 394 /* do am_anon */ 395 oldover = amap->am_anon; 396 memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot); 397 memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) * 398 slotadded); 399 amap->am_anon = newover; 400 401 /* do am_bckptr */ 402 oldbck = amap->am_bckptr; 403 memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot); 404 memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: needed? */ 405 amap->am_bckptr = newbck; 406 407 #ifdef UVM_AMAP_PPREF 408 /* do ppref */ 409 oldppref = amap->am_ppref; 410 if (newppref) { 411 memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot); 412 memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded); 413 amap->am_ppref = newppref; 414 if ((slotoff + slotmapped) < amap->am_nslot) 415 amap_pp_adjref(amap, slotoff + slotmapped, 416 (amap->am_nslot - (slotoff + slotmapped)), 1); 417 pp_setreflen(newppref, amap->am_nslot, 1, 418 slotneed - amap->am_nslot); 419 } 420 #endif 421 422 /* update master values */ 423 amap->am_nslot = slotneed; 424 amap->am_maxslot = slotalloc; 425 426 /* and free */ 427 free(oldsl, M_UVMAMAP); 428 #ifdef UVM_AMAP_PPREF 429 if (oldppref && oldppref != PPREF_NONE) 430 free(oldppref, M_UVMAMAP); 431 #endif 432 return (0); 433 } 434 435 /* 436 * amap_share_protect: change protection of anons in a shared amap 437 * 438 * for shared amaps, given the current data structure layout, it is 439 * not possible for us to directly locate all maps referencing the 440 * shared anon (to change the protection). in order to protect data 441 * in shared maps we use pmap_page_protect(). [this is useful for IPC 442 * mechanisms like map entry passing that may want to write-protect 443 * all mappings of a shared amap.] we traverse am_anon or am_slots 444 * depending on the current state of the amap. 445 * 446 * => entry's map and amap must be locked by the caller 447 */ 448 void 449 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot) 450 { 451 struct vm_amap *amap = entry->aref.ar_amap; 452 int slots, lcv, slot, stop; 453 454 AMAP_B2SLOT(slots, (entry->end - entry->start)); 455 stop = entry->aref.ar_pageoff + slots; 456 457 if (slots < amap->am_nused) { 458 /* cheaper to traverse am_anon */ 459 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) { 460 if (amap->am_anon[lcv] == NULL) 461 continue; 462 if (amap->am_anon[lcv]->an_page != NULL) 463 pmap_page_protect(amap->am_anon[lcv]->an_page, 464 prot); 465 } 466 return; 467 } 468 469 /* cheaper to traverse am_slots */ 470 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 471 slot = amap->am_slots[lcv]; 472 if (slot < entry->aref.ar_pageoff || slot >= stop) 473 continue; 474 if (amap->am_anon[slot]->an_page != NULL) 475 pmap_page_protect(amap->am_anon[slot]->an_page, prot); 476 } 477 return; 478 } 479 480 /* 481 * amap_wipeout: wipeout all anon's in an amap; then free the amap! 482 * 483 * => called from amap_unref when the final reference to an amap is 484 * discarded (i.e. when reference count == 1) 485 * => the amap should be locked (by the caller) 486 */ 487 488 void 489 amap_wipeout(struct vm_amap *amap) 490 { 491 int lcv, slot; 492 struct vm_anon *anon; 493 494 KASSERT(amap->am_ref == 0); 495 496 if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) { 497 /* 498 * amap_swap_off will call us again. 499 */ 500 return; 501 } 502 amap_list_remove(amap); 503 504 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 505 int refs; 506 507 slot = amap->am_slots[lcv]; 508 anon = amap->am_anon[slot]; 509 510 if (anon == NULL || anon->an_ref == 0) 511 panic("amap_wipeout: corrupt amap"); 512 513 simple_lock(&anon->an_lock); /* lock anon */ 514 515 refs = --anon->an_ref; 516 simple_unlock(&anon->an_lock); 517 if (refs == 0) { 518 /* 519 * we had the last reference to a vm_anon. free it. 520 */ 521 uvm_anfree(anon); 522 } 523 } 524 525 /* 526 * now we free the map 527 */ 528 529 amap->am_ref = 0; /* ... was one */ 530 amap->am_nused = 0; 531 amap_free(amap); /* will unlock and free amap */ 532 } 533 534 /* 535 * amap_copy: ensure that a map entry's "needs_copy" flag is false 536 * by copying the amap if necessary. 537 * 538 * => an entry with a null amap pointer will get a new (blank) one. 539 * => the map that the map entry belongs to must be locked by caller. 540 * => the amap currently attached to "entry" (if any) must be unlocked. 541 * => if canchunk is true, then we may clip the entry into a chunk 542 * => "startva" and "endva" are used only if canchunk is true. they are 543 * used to limit chunking (e.g. if you have a large space that you 544 * know you are going to need to allocate amaps for, there is no point 545 * in allowing that to be chunked) 546 */ 547 548 void 549 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf, 550 boolean_t canchunk, vaddr_t startva, vaddr_t endva) 551 { 552 struct vm_amap *amap, *srcamap; 553 int slots, lcv; 554 vaddr_t chunksize; 555 556 /* 557 * is there a map to copy? if not, create one from scratch. 558 */ 559 560 if (entry->aref.ar_amap == NULL) { 561 562 /* 563 * check to see if we have a large amap that we can 564 * chunk. we align startva/endva to chunk-sized 565 * boundaries and then clip to them. 566 */ 567 568 if (canchunk && atop(entry->end - entry->start) >= 569 UVM_AMAP_LARGE) { 570 /* convert slots to bytes */ 571 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT; 572 startva = (startva / chunksize) * chunksize; 573 endva = roundup(endva, chunksize); 574 UVM_MAP_CLIP_START(map, entry, startva); 575 /* watch out for endva wrap-around! */ 576 if (endva >= startva) 577 UVM_MAP_CLIP_END(map, entry, endva); 578 } 579 580 entry->aref.ar_pageoff = 0; 581 entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0, 582 waitf); 583 if (entry->aref.ar_amap != NULL) 584 entry->etype &= ~UVM_ET_NEEDSCOPY; 585 return; 586 } 587 588 /* 589 * first check and see if we are the only map entry 590 * referencing the amap we currently have. if so, then we can 591 * just take it over rather than copying it. note that we are 592 * reading am_ref with the amap unlocked... the value can only 593 * be one if we have the only reference to the amap (via our 594 * locked map). if we are greater than one we fall through to 595 * the next case (where we double check the value). 596 */ 597 598 if (entry->aref.ar_amap->am_ref == 1) { 599 entry->etype &= ~UVM_ET_NEEDSCOPY; 600 return; 601 } 602 603 /* 604 * looks like we need to copy the map. 605 */ 606 607 AMAP_B2SLOT(slots, entry->end - entry->start); 608 amap = amap_alloc1(slots, 0, waitf); 609 if (amap == NULL) 610 return; 611 srcamap = entry->aref.ar_amap; 612 613 /* 614 * need to double check reference count now that we've got the 615 * src amap locked down. the reference count could have 616 * changed while we were in malloc. if the reference count 617 * dropped down to one we take over the old map rather than 618 * copying the amap. 619 */ 620 621 if (srcamap->am_ref == 1) { /* take it over? */ 622 entry->etype &= ~UVM_ET_NEEDSCOPY; 623 amap->am_ref--; /* drop final reference to map */ 624 amap_free(amap); /* dispose of new (unused) amap */ 625 return; 626 } 627 628 /* 629 * we must copy it now. 630 */ 631 632 for (lcv = 0 ; lcv < slots; lcv++) { 633 amap->am_anon[lcv] = 634 srcamap->am_anon[entry->aref.ar_pageoff + lcv]; 635 if (amap->am_anon[lcv] == NULL) 636 continue; 637 simple_lock(&amap->am_anon[lcv]->an_lock); 638 amap->am_anon[lcv]->an_ref++; 639 simple_unlock(&amap->am_anon[lcv]->an_lock); 640 amap->am_bckptr[lcv] = amap->am_nused; 641 amap->am_slots[amap->am_nused] = lcv; 642 amap->am_nused++; 643 } 644 memset(&amap->am_anon[lcv], 0, 645 (amap->am_maxslot - lcv) * sizeof(struct vm_anon *)); 646 647 /* 648 * drop our reference to the old amap (srcamap) and unlock. 649 * we know that the reference count on srcamap is greater than 650 * one (we checked above), so there is no way we could drop 651 * the count to zero. [and no need to worry about freeing it] 652 */ 653 654 srcamap->am_ref--; 655 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) 656 srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 657 #ifdef UVM_AMAP_PPREF 658 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) { 659 amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 660 (entry->end - entry->start) >> PAGE_SHIFT, -1); 661 } 662 #endif 663 664 /* 665 * install new amap. 666 */ 667 668 entry->aref.ar_pageoff = 0; 669 entry->aref.ar_amap = amap; 670 entry->etype &= ~UVM_ET_NEEDSCOPY; 671 672 amap_list_insert(amap); 673 } 674 675 /* 676 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2) 677 * 678 * called during fork(2) when the parent process has a wired map 679 * entry. in that case we want to avoid write-protecting pages 680 * in the parent's map (e.g. like what you'd do for a COW page) 681 * so we resolve the COW here. 682 * 683 * => assume parent's entry was wired, thus all pages are resident. 684 * => assume pages that are loaned out (loan_count) are already mapped 685 * read-only in all maps, and thus no need for us to worry about them 686 * => assume both parent and child vm_map's are locked 687 * => caller passes child's map/entry in to us 688 * => if we run out of memory we will unlock the amap and sleep _with_ the 689 * parent and child vm_map's locked(!). we have to do this since 690 * we are in the middle of a fork(2) and we can't let the parent 691 * map change until we are done copying all the map entries. 692 * => XXXCDC: out of memory should cause fork to fail, but there is 693 * currently no easy way to do this (needs fix) 694 * => page queues must be unlocked (we may lock them) 695 */ 696 697 void 698 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry) 699 { 700 struct vm_amap *amap = entry->aref.ar_amap; 701 int lcv, slot; 702 struct vm_anon *anon, *nanon; 703 struct vm_page *pg, *npg; 704 705 /* 706 * note that if we unlock the amap then we must ReStart the "lcv" for 707 * loop because some other process could reorder the anon's in the 708 * am_anon[] array on us while the lock is dropped. 709 */ 710 ReStart: 711 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 712 713 /* 714 * get the page 715 */ 716 717 slot = amap->am_slots[lcv]; 718 anon = amap->am_anon[slot]; 719 simple_lock(&anon->an_lock); 720 pg = anon->an_page; 721 722 /* 723 * page must be resident since parent is wired 724 */ 725 726 if (pg == NULL) 727 panic("amap_cow_now: non-resident wired page in anon %p", 728 anon); 729 730 /* 731 * if the anon ref count is one and the page is not loaned, 732 * then we are safe (the child has exclusive access to the 733 * page). if the page is loaned, then it must already be 734 * mapped read-only. 735 * 736 * we only need to get involved when these are not true. 737 * [note: if loan_count == 0, then the anon must own the page] 738 */ 739 740 if (anon->an_ref > 1 && pg->loan_count == 0) { 741 742 /* 743 * if the page is busy then we have to unlock, wait for 744 * it and then restart. 745 */ 746 if (pg->pg_flags & PG_BUSY) { 747 atomic_setbits_int(&pg->pg_flags, PG_WANTED); 748 UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE, 749 "cownow", 0); 750 goto ReStart; 751 } 752 753 /* 754 * ok, time to do a copy-on-write to a new anon 755 */ 756 nanon = uvm_analloc(); 757 if (nanon) { 758 npg = uvm_pagealloc(NULL, 0, nanon, 0); 759 } else 760 npg = NULL; /* XXX: quiet gcc warning */ 761 762 if (nanon == NULL || npg == NULL) { 763 /* out of memory */ 764 /* 765 * XXXCDC: we should cause fork to fail, but 766 * we can't ... 767 */ 768 if (nanon) { 769 simple_lock(&nanon->an_lock); 770 uvm_anfree(nanon); 771 } 772 simple_unlock(&anon->an_lock); 773 uvm_wait("cownowpage"); 774 goto ReStart; 775 } 776 777 /* 778 * got it... now we can copy the data and replace anon 779 * with our new one... 780 */ 781 uvm_pagecopy(pg, npg); /* old -> new */ 782 anon->an_ref--; /* can't drop to zero */ 783 amap->am_anon[slot] = nanon; /* replace */ 784 785 /* 786 * drop PG_BUSY on new page ... since we have had it's 787 * owner locked the whole time it can't be 788 * PG_RELEASED | PG_WANTED. 789 */ 790 atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE); 791 UVM_PAGE_OWN(npg, NULL); 792 uvm_lock_pageq(); 793 uvm_pageactivate(npg); 794 uvm_unlock_pageq(); 795 } 796 797 simple_unlock(&anon->an_lock); 798 /* 799 * done with this anon, next ...! 800 */ 801 802 } /* end of 'for' loop */ 803 } 804 805 /* 806 * amap_splitref: split a single reference into two separate references 807 * 808 * => called from uvm_map's clip routines 809 * => origref's map should be locked 810 * => origref->ar_amap should be unlocked (we will lock) 811 */ 812 void 813 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset) 814 { 815 int leftslots; 816 817 AMAP_B2SLOT(leftslots, offset); 818 if (leftslots == 0) 819 panic("amap_splitref: split at zero offset"); 820 821 /* 822 * now: amap is locked and we have a valid am_mapped array. 823 */ 824 825 if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0) 826 panic("amap_splitref: map size check failed"); 827 828 #ifdef UVM_AMAP_PPREF 829 /* 830 * establish ppref before we add a duplicate reference to the amap 831 */ 832 if (origref->ar_amap->am_ppref == NULL) 833 amap_pp_establish(origref->ar_amap); 834 #endif 835 836 splitref->ar_amap = origref->ar_amap; 837 splitref->ar_amap->am_ref++; /* not a share reference */ 838 splitref->ar_pageoff = origref->ar_pageoff + leftslots; 839 } 840 841 #ifdef UVM_AMAP_PPREF 842 843 /* 844 * amap_pp_establish: add a ppref array to an amap, if possible 845 * 846 * => amap locked by caller 847 */ 848 void 849 amap_pp_establish(struct vm_amap *amap) 850 { 851 852 amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot, 853 M_UVMAMAP, M_NOWAIT|M_ZERO); 854 855 /* 856 * if we fail then we just won't use ppref for this amap 857 */ 858 if (amap->am_ppref == NULL) { 859 amap->am_ppref = PPREF_NONE; /* not using it */ 860 return; 861 } 862 863 /* 864 * init ppref 865 */ 866 pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot); 867 } 868 869 /* 870 * amap_pp_adjref: adjust reference count to a part of an amap using the 871 * per-page reference count array. 872 * 873 * => map and amap locked by caller 874 * => caller must check that ppref != PPREF_NONE before calling 875 */ 876 void 877 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval) 878 { 879 int stopslot, *ppref, lcv, prevlcv; 880 int ref, len, prevref, prevlen; 881 882 stopslot = curslot + slotlen; 883 ppref = amap->am_ppref; 884 prevlcv = 0; 885 886 /* 887 * first advance to the correct place in the ppref array, 888 * fragment if needed. 889 */ 890 891 for (lcv = 0 ; lcv < curslot ; lcv += len) { 892 pp_getreflen(ppref, lcv, &ref, &len); 893 if (lcv + len > curslot) { /* goes past start? */ 894 pp_setreflen(ppref, lcv, ref, curslot - lcv); 895 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv)); 896 len = curslot - lcv; /* new length of entry @ lcv */ 897 } 898 prevlcv = lcv; 899 } 900 if (lcv != 0) 901 pp_getreflen(ppref, prevlcv, &prevref, &prevlen); 902 else { 903 /* Ensure that the "prevref == ref" test below always 904 * fails, since we're starting from the beginning of 905 * the ppref array; that is, there is no previous 906 * chunk. 907 */ 908 prevref = -1; 909 prevlen = 0; 910 } 911 912 /* 913 * now adjust reference counts in range. merge the first 914 * changed entry with the last unchanged entry if possible. 915 */ 916 917 if (lcv != curslot) 918 panic("amap_pp_adjref: overshot target"); 919 920 for (/* lcv already set */; lcv < stopslot ; lcv += len) { 921 pp_getreflen(ppref, lcv, &ref, &len); 922 if (lcv + len > stopslot) { /* goes past end? */ 923 pp_setreflen(ppref, lcv, ref, stopslot - lcv); 924 pp_setreflen(ppref, stopslot, ref, 925 len - (stopslot - lcv)); 926 len = stopslot - lcv; 927 } 928 ref += adjval; 929 if (ref < 0) 930 panic("amap_pp_adjref: negative reference count"); 931 if (lcv == prevlcv + prevlen && ref == prevref) { 932 pp_setreflen(ppref, prevlcv, ref, prevlen + len); 933 } else { 934 pp_setreflen(ppref, lcv, ref, len); 935 } 936 if (ref == 0) 937 amap_wiperange(amap, lcv, len); 938 } 939 940 } 941 942 /* 943 * amap_wiperange: wipe out a range of an amap 944 * [different from amap_wipeout because the amap is kept intact] 945 * 946 * => both map and amap must be locked by caller. 947 */ 948 void 949 amap_wiperange(struct vm_amap *amap, int slotoff, int slots) 950 { 951 int byanon, lcv, stop, curslot, ptr, slotend; 952 struct vm_anon *anon; 953 954 /* 955 * we can either traverse the amap by am_anon or by am_slots depending 956 * on which is cheaper. decide now. 957 */ 958 959 if (slots < amap->am_nused) { 960 byanon = TRUE; 961 lcv = slotoff; 962 stop = slotoff + slots; 963 } else { 964 byanon = FALSE; 965 lcv = 0; 966 stop = amap->am_nused; 967 slotend = slotoff + slots; 968 } 969 970 while (lcv < stop) { 971 int refs; 972 973 if (byanon) { 974 curslot = lcv++; /* lcv advances here */ 975 if (amap->am_anon[curslot] == NULL) 976 continue; 977 } else { 978 curslot = amap->am_slots[lcv]; 979 if (curslot < slotoff || curslot >= slotend) { 980 lcv++; /* lcv advances here */ 981 continue; 982 } 983 stop--; /* drop stop, since anon will be removed */ 984 } 985 anon = amap->am_anon[curslot]; 986 987 /* 988 * remove it from the amap 989 */ 990 amap->am_anon[curslot] = NULL; 991 ptr = amap->am_bckptr[curslot]; 992 if (ptr != (amap->am_nused - 1)) { 993 amap->am_slots[ptr] = 994 amap->am_slots[amap->am_nused - 1]; 995 amap->am_bckptr[amap->am_slots[ptr]] = 996 ptr; /* back ptr. */ 997 } 998 amap->am_nused--; 999 1000 /* 1001 * drop anon reference count 1002 */ 1003 simple_lock(&anon->an_lock); 1004 refs = --anon->an_ref; 1005 simple_unlock(&anon->an_lock); 1006 if (refs == 0) { 1007 /* 1008 * we just eliminated the last reference to an anon. 1009 * free it. 1010 */ 1011 uvm_anfree(anon); 1012 } 1013 } 1014 } 1015 1016 #endif 1017 1018 /* 1019 * amap_swap_off: pagein anonymous pages in amaps and drop swap slots. 1020 * 1021 * => called with swap_syscall_lock held. 1022 * => note that we don't always traverse all anons. 1023 * eg. amaps being wiped out, released anons. 1024 * => return TRUE if failed. 1025 */ 1026 1027 boolean_t 1028 amap_swap_off(int startslot, int endslot) 1029 { 1030 struct vm_amap *am; 1031 struct vm_amap *am_next; 1032 struct vm_amap marker_prev; 1033 struct vm_amap marker_next; 1034 boolean_t rv = FALSE; 1035 1036 #if defined(DIAGNOSTIC) 1037 memset(&marker_prev, 0, sizeof(marker_prev)); 1038 memset(&marker_next, 0, sizeof(marker_next)); 1039 #endif /* defined(DIAGNOSTIC) */ 1040 1041 for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) { 1042 int i; 1043 1044 LIST_INSERT_BEFORE(am, &marker_prev, am_list); 1045 LIST_INSERT_AFTER(am, &marker_next, am_list); 1046 1047 if (am->am_nused <= 0) { 1048 goto next; 1049 } 1050 1051 for (i = 0; i < am->am_nused; i++) { 1052 int slot; 1053 int swslot; 1054 struct vm_anon *anon; 1055 1056 slot = am->am_slots[i]; 1057 anon = am->am_anon[slot]; 1058 simple_lock(&anon->an_lock); 1059 1060 swslot = anon->an_swslot; 1061 if (swslot < startslot || endslot <= swslot) { 1062 simple_unlock(&anon->an_lock); 1063 continue; 1064 } 1065 1066 am->am_flags |= AMAP_SWAPOFF; 1067 1068 rv = uvm_anon_pagein(anon); 1069 1070 am->am_flags &= ~AMAP_SWAPOFF; 1071 if (amap_refs(am) == 0) { 1072 amap_wipeout(am); 1073 am = NULL; 1074 break; 1075 } 1076 if (rv) { 1077 break; 1078 } 1079 i = 0; 1080 } 1081 1082 next: 1083 KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next || 1084 LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) == 1085 &marker_next); 1086 am_next = LIST_NEXT(&marker_next, am_list); 1087 LIST_REMOVE(&marker_prev, am_list); 1088 LIST_REMOVE(&marker_next, am_list); 1089 } 1090 1091 return rv; 1092 } 1093 1094 /* 1095 * amap_lookup: look up a page in an amap 1096 * 1097 * => amap should be locked by caller. 1098 */ 1099 struct vm_anon * 1100 amap_lookup(struct vm_aref *aref, vaddr_t offset) 1101 { 1102 int slot; 1103 struct vm_amap *amap = aref->ar_amap; 1104 1105 AMAP_B2SLOT(slot, offset); 1106 slot += aref->ar_pageoff; 1107 1108 if (slot >= amap->am_nslot) 1109 panic("amap_lookup: offset out of range"); 1110 1111 return(amap->am_anon[slot]); 1112 } 1113 1114 /* 1115 * amap_lookups: look up a range of pages in an amap 1116 * 1117 * => amap should be locked by caller. 1118 * => XXXCDC: this interface is biased toward array-based amaps. fix. 1119 */ 1120 void 1121 amap_lookups(struct vm_aref *aref, vaddr_t offset, 1122 struct vm_anon **anons, int npages) 1123 { 1124 int slot; 1125 struct vm_amap *amap = aref->ar_amap; 1126 1127 AMAP_B2SLOT(slot, offset); 1128 slot += aref->ar_pageoff; 1129 1130 if ((slot + (npages - 1)) >= amap->am_nslot) 1131 panic("amap_lookups: offset out of range"); 1132 1133 memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *)); 1134 1135 return; 1136 } 1137 1138 /* 1139 * amap_add: add (or replace) a page to an amap 1140 * 1141 * => caller must lock amap. 1142 * => if (replace) caller must lock anon because we might have to call 1143 * pmap_page_protect on the anon's page. 1144 * => returns an "offset" which is meaningful to amap_unadd(). 1145 */ 1146 void 1147 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon, 1148 boolean_t replace) 1149 { 1150 int slot; 1151 struct vm_amap *amap = aref->ar_amap; 1152 1153 AMAP_B2SLOT(slot, offset); 1154 slot += aref->ar_pageoff; 1155 1156 if (slot >= amap->am_nslot) 1157 panic("amap_add: offset out of range"); 1158 1159 if (replace) { 1160 1161 if (amap->am_anon[slot] == NULL) 1162 panic("amap_add: replacing null anon"); 1163 if (amap->am_anon[slot]->an_page != NULL && 1164 (amap->am_flags & AMAP_SHARED) != 0) { 1165 pmap_page_protect(amap->am_anon[slot]->an_page, 1166 VM_PROT_NONE); 1167 /* 1168 * XXX: suppose page is supposed to be wired somewhere? 1169 */ 1170 } 1171 } else { /* !replace */ 1172 if (amap->am_anon[slot] != NULL) 1173 panic("amap_add: slot in use"); 1174 1175 amap->am_bckptr[slot] = amap->am_nused; 1176 amap->am_slots[amap->am_nused] = slot; 1177 amap->am_nused++; 1178 } 1179 amap->am_anon[slot] = anon; 1180 } 1181 1182 /* 1183 * amap_unadd: remove a page from an amap 1184 * 1185 * => caller must lock amap 1186 */ 1187 void 1188 amap_unadd(struct vm_aref *aref, vaddr_t offset) 1189 { 1190 int ptr, slot; 1191 struct vm_amap *amap = aref->ar_amap; 1192 1193 AMAP_B2SLOT(slot, offset); 1194 slot += aref->ar_pageoff; 1195 1196 if (slot >= amap->am_nslot) 1197 panic("amap_unadd: offset out of range"); 1198 1199 if (amap->am_anon[slot] == NULL) 1200 panic("amap_unadd: nothing there"); 1201 1202 amap->am_anon[slot] = NULL; 1203 ptr = amap->am_bckptr[slot]; 1204 1205 if (ptr != (amap->am_nused - 1)) { /* swap to keep slots contig? */ 1206 amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1]; 1207 amap->am_bckptr[amap->am_slots[ptr]] = ptr; /* back link */ 1208 } 1209 amap->am_nused--; 1210 } 1211 1212 /* 1213 * amap_ref: gain a reference to an amap 1214 * 1215 * => amap must not be locked (we will lock) 1216 * => "offset" and "len" are in units of pages 1217 * => called at fork time to gain the child's reference 1218 */ 1219 void 1220 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags) 1221 { 1222 1223 amap->am_ref++; 1224 if (flags & AMAP_SHARED) 1225 amap->am_flags |= AMAP_SHARED; 1226 #ifdef UVM_AMAP_PPREF 1227 if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 && 1228 len != amap->am_nslot) 1229 amap_pp_establish(amap); 1230 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 1231 if (flags & AMAP_REFALL) 1232 amap_pp_adjref(amap, 0, amap->am_nslot, 1); 1233 else 1234 amap_pp_adjref(amap, offset, len, 1); 1235 } 1236 #endif 1237 } 1238 1239 /* 1240 * amap_unref: remove a reference to an amap 1241 * 1242 * => caller must remove all pmap-level references to this amap before 1243 * dropping the reference 1244 * => called from uvm_unmap_detach [only] ... note that entry is no 1245 * longer part of a map and thus has no need for locking 1246 * => amap must be unlocked (we will lock it). 1247 */ 1248 void 1249 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all) 1250 { 1251 1252 /* 1253 * if we are the last reference, free the amap and return. 1254 */ 1255 1256 if (amap->am_ref-- == 1) { 1257 amap_wipeout(amap); /* drops final ref and frees */ 1258 return; /* no need to unlock */ 1259 } 1260 1261 /* 1262 * otherwise just drop the reference count(s) 1263 */ 1264 if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0) 1265 amap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 1266 #ifdef UVM_AMAP_PPREF 1267 if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot) 1268 amap_pp_establish(amap); 1269 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 1270 if (all) 1271 amap_pp_adjref(amap, 0, amap->am_nslot, -1); 1272 else 1273 amap_pp_adjref(amap, offset, len, -1); 1274 } 1275 #endif 1276 } 1277