1 /* $OpenBSD: uvm_aobj.c,v 1.104 2022/07/11 11:33:17 mpi Exp $ */ 2 /* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */ 3 4 /* 5 * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and 6 * Washington University. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * 29 * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp 30 */ 31 /* 32 * uvm_aobj.c: anonymous memory uvm_object pager 33 * 34 * author: Chuck Silvers <chuq@chuq.com> 35 * started: Jan-1998 36 * 37 * - design mostly from Chuck Cranor 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/malloc.h> 43 #include <sys/kernel.h> 44 #include <sys/pool.h> 45 #include <sys/stdint.h> 46 #include <sys/atomic.h> 47 48 #include <uvm/uvm.h> 49 50 /* 51 * An anonymous UVM object (aobj) manages anonymous-memory. In addition to 52 * keeping the list of resident pages, it may also keep a list of allocated 53 * swap blocks. Depending on the size of the object, this list is either 54 * stored in an array (small objects) or in a hash table (large objects). 55 */ 56 57 /* 58 * Note: for hash tables, we break the address space of the aobj into blocks 59 * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two. 60 */ 61 #define UAO_SWHASH_CLUSTER_SHIFT 4 62 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT) 63 64 /* Get the "tag" for this page index. */ 65 #define UAO_SWHASH_ELT_TAG(idx) ((idx) >> UAO_SWHASH_CLUSTER_SHIFT) 66 #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \ 67 ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1)) 68 69 /* Given an ELT and a page index, find the swap slot. */ 70 #define UAO_SWHASH_ELT_PAGESLOT(elt, idx) \ 71 ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)]) 72 73 /* Given an ELT, return its pageidx base. */ 74 #define UAO_SWHASH_ELT_PAGEIDX_BASE(elt) \ 75 ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT) 76 77 /* The hash function. */ 78 #define UAO_SWHASH_HASH(aobj, idx) \ 79 (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \ 80 & (aobj)->u_swhashmask)]) 81 82 /* 83 * The threshold which determines whether we will use an array or a 84 * hash table to store the list of allocated swap blocks. 85 */ 86 #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4) 87 #define UAO_USES_SWHASH(aobj) \ 88 ((aobj)->u_pages > UAO_SWHASH_THRESHOLD) 89 90 /* The number of buckets in a hash, with an upper bound. */ 91 #define UAO_SWHASH_MAXBUCKETS 256 92 #define UAO_SWHASH_BUCKETS(pages) \ 93 (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS)) 94 95 96 /* 97 * uao_swhash_elt: when a hash table is being used, this structure defines 98 * the format of an entry in the bucket list. 99 */ 100 struct uao_swhash_elt { 101 LIST_ENTRY(uao_swhash_elt) list; /* the hash list */ 102 voff_t tag; /* our 'tag' */ 103 int count; /* our number of active slots */ 104 int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */ 105 }; 106 107 /* 108 * uao_swhash: the swap hash table structure 109 */ 110 LIST_HEAD(uao_swhash, uao_swhash_elt); 111 112 /* 113 * uao_swhash_elt_pool: pool of uao_swhash_elt structures 114 */ 115 struct pool uao_swhash_elt_pool; 116 117 /* 118 * uvm_aobj: the actual anon-backed uvm_object 119 * 120 * => the uvm_object is at the top of the structure, this allows 121 * (struct uvm_aobj *) == (struct uvm_object *) 122 * => only one of u_swslots and u_swhash is used in any given aobj 123 */ 124 struct uvm_aobj { 125 struct uvm_object u_obj; /* has: pgops, memt, #pages, #refs */ 126 int u_pages; /* number of pages in entire object */ 127 int u_flags; /* the flags (see uvm_aobj.h) */ 128 /* 129 * Either an array or hashtable (array of bucket heads) of 130 * offset -> swapslot mappings for the aobj. 131 */ 132 #define u_swslots u_swap.slot_array 133 #define u_swhash u_swap.slot_hash 134 union swslots { 135 int *slot_array; 136 struct uao_swhash *slot_hash; 137 } u_swap; 138 u_long u_swhashmask; /* mask for hashtable */ 139 LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */ 140 }; 141 142 struct pool uvm_aobj_pool; 143 144 static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int, 145 boolean_t); 146 static boolean_t uao_flush(struct uvm_object *, voff_t, 147 voff_t, int); 148 static void uao_free(struct uvm_aobj *); 149 static int uao_get(struct uvm_object *, voff_t, 150 vm_page_t *, int *, int, vm_prot_t, 151 int, int); 152 static boolean_t uao_pagein(struct uvm_aobj *, int, int); 153 static boolean_t uao_pagein_page(struct uvm_aobj *, int); 154 155 void uao_dropswap_range(struct uvm_object *, voff_t, voff_t); 156 void uao_shrink_flush(struct uvm_object *, int, int); 157 int uao_shrink_hash(struct uvm_object *, int); 158 int uao_shrink_array(struct uvm_object *, int); 159 int uao_shrink_convert(struct uvm_object *, int); 160 161 int uao_grow_hash(struct uvm_object *, int); 162 int uao_grow_array(struct uvm_object *, int); 163 int uao_grow_convert(struct uvm_object *, int); 164 165 /* 166 * aobj_pager 167 * 168 * note that some functions (e.g. put) are handled elsewhere 169 */ 170 const struct uvm_pagerops aobj_pager = { 171 .pgo_reference = uao_reference, 172 .pgo_detach = uao_detach, 173 .pgo_flush = uao_flush, 174 .pgo_get = uao_get, 175 }; 176 177 /* 178 * uao_list: global list of active aobjs, locked by uao_list_lock 179 * 180 * Lock ordering: generally the locking order is object lock, then list lock. 181 * in the case of swap off we have to iterate over the list, and thus the 182 * ordering is reversed. In that case we must use trylocking to prevent 183 * deadlock. 184 */ 185 static LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list); 186 static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 187 188 189 /* 190 * functions 191 */ 192 /* 193 * hash table/array related functions 194 */ 195 /* 196 * uao_find_swhash_elt: find (or create) a hash table entry for a page 197 * offset. 198 */ 199 static struct uao_swhash_elt * 200 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create) 201 { 202 struct uao_swhash *swhash; 203 struct uao_swhash_elt *elt; 204 voff_t page_tag; 205 206 swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */ 207 page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* tag to search for */ 208 209 /* 210 * now search the bucket for the requested tag 211 */ 212 LIST_FOREACH(elt, swhash, list) { 213 if (elt->tag == page_tag) 214 return elt; 215 } 216 217 if (!create) 218 return NULL; 219 220 /* 221 * allocate a new entry for the bucket and init/insert it in 222 */ 223 elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT | PR_ZERO); 224 /* 225 * XXX We cannot sleep here as the hash table might disappear 226 * from under our feet. And we run the risk of deadlocking 227 * the pagedeamon. In fact this code will only be called by 228 * the pagedaemon and allocation will only fail if we 229 * exhausted the pagedeamon reserve. In that case we're 230 * doomed anyway, so panic. 231 */ 232 if (elt == NULL) 233 panic("%s: can't allocate entry", __func__); 234 LIST_INSERT_HEAD(swhash, elt, list); 235 elt->tag = page_tag; 236 237 return elt; 238 } 239 240 /* 241 * uao_find_swslot: find the swap slot number for an aobj/pageidx 242 */ 243 int 244 uao_find_swslot(struct uvm_object *uobj, int pageidx) 245 { 246 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 247 248 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 249 250 /* 251 * if noswap flag is set, then we never return a slot 252 */ 253 if (aobj->u_flags & UAO_FLAG_NOSWAP) 254 return 0; 255 256 /* 257 * if hashing, look in hash table. 258 */ 259 if (UAO_USES_SWHASH(aobj)) { 260 struct uao_swhash_elt *elt = 261 uao_find_swhash_elt(aobj, pageidx, FALSE); 262 263 if (elt) 264 return UAO_SWHASH_ELT_PAGESLOT(elt, pageidx); 265 else 266 return 0; 267 } 268 269 /* 270 * otherwise, look in the array 271 */ 272 return aobj->u_swslots[pageidx]; 273 } 274 275 /* 276 * uao_set_swslot: set the swap slot for a page in an aobj. 277 * 278 * => setting a slot to zero frees the slot 279 * => object must be locked by caller 280 * => we return the old slot number, or -1 if we failed to allocate 281 * memory to record the new slot number 282 */ 283 int 284 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot) 285 { 286 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 287 int oldslot; 288 289 KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0); 290 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 291 292 /* 293 * if noswap flag is set, then we can't set a slot 294 */ 295 if (aobj->u_flags & UAO_FLAG_NOSWAP) { 296 if (slot == 0) 297 return 0; /* a clear is ok */ 298 299 /* but a set is not */ 300 printf("uao_set_swslot: uobj = %p\n", uobj); 301 panic("uao_set_swslot: attempt to set a slot on a NOSWAP object"); 302 } 303 304 /* 305 * are we using a hash table? if so, add it in the hash. 306 */ 307 if (UAO_USES_SWHASH(aobj)) { 308 /* 309 * Avoid allocating an entry just to free it again if 310 * the page had not swap slot in the first place, and 311 * we are freeing. 312 */ 313 struct uao_swhash_elt *elt = 314 uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE); 315 if (elt == NULL) { 316 KASSERT(slot == 0); 317 return 0; 318 } 319 320 oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx); 321 UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot; 322 323 /* 324 * now adjust the elt's reference counter and free it if we've 325 * dropped it to zero. 326 */ 327 if (slot) { 328 if (oldslot == 0) 329 elt->count++; 330 } else { 331 if (oldslot) 332 elt->count--; 333 334 if (elt->count == 0) { 335 LIST_REMOVE(elt, list); 336 pool_put(&uao_swhash_elt_pool, elt); 337 } 338 } 339 } else { 340 /* we are using an array */ 341 oldslot = aobj->u_swslots[pageidx]; 342 aobj->u_swslots[pageidx] = slot; 343 } 344 return oldslot; 345 } 346 /* 347 * end of hash/array functions 348 */ 349 350 /* 351 * uao_free: free all resources held by an aobj, and then free the aobj 352 * 353 * => the aobj should be dead 354 */ 355 static void 356 uao_free(struct uvm_aobj *aobj) 357 { 358 struct uvm_object *uobj = &aobj->u_obj; 359 360 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 361 KASSERT(rw_write_held(uobj->vmobjlock)); 362 uao_dropswap_range(uobj, 0, 0); 363 rw_exit(uobj->vmobjlock); 364 365 if (UAO_USES_SWHASH(aobj)) { 366 /* 367 * free the hash table itself. 368 */ 369 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); 370 } else { 371 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); 372 } 373 374 /* 375 * finally free the aobj itself 376 */ 377 uvm_obj_destroy(uobj); 378 pool_put(&uvm_aobj_pool, aobj); 379 } 380 381 /* 382 * pager functions 383 */ 384 385 #ifdef TMPFS 386 /* 387 * Shrink an aobj to a given number of pages. The procedure is always the same: 388 * assess the necessity of data structure conversion (hash to array), secure 389 * resources, flush pages and drop swap slots. 390 * 391 */ 392 393 void 394 uao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg) 395 { 396 KASSERT(startpg < endpg); 397 KASSERT(uobj->uo_refs == 1); 398 uao_flush(uobj, (voff_t)startpg << PAGE_SHIFT, 399 (voff_t)endpg << PAGE_SHIFT, PGO_FREE); 400 uao_dropswap_range(uobj, startpg, endpg); 401 } 402 403 int 404 uao_shrink_hash(struct uvm_object *uobj, int pages) 405 { 406 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 407 struct uao_swhash *new_swhash; 408 struct uao_swhash_elt *elt; 409 unsigned long new_hashmask; 410 int i; 411 412 KASSERT(UAO_USES_SWHASH(aobj)); 413 414 /* 415 * If the size of the hash table doesn't change, all we need to do is 416 * to adjust the page count. 417 */ 418 if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) { 419 uao_shrink_flush(uobj, pages, aobj->u_pages); 420 aobj->u_pages = pages; 421 return 0; 422 } 423 424 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, 425 M_WAITOK | M_CANFAIL, &new_hashmask); 426 if (new_swhash == NULL) 427 return ENOMEM; 428 429 uao_shrink_flush(uobj, pages, aobj->u_pages); 430 431 /* 432 * Even though the hash table size is changing, the hash of the buckets 433 * we are interested in copying should not change. 434 */ 435 for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) { 436 while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) { 437 elt = LIST_FIRST(&aobj->u_swhash[i]); 438 LIST_REMOVE(elt, list); 439 LIST_INSERT_HEAD(&new_swhash[i], elt, list); 440 } 441 } 442 443 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); 444 445 aobj->u_swhash = new_swhash; 446 aobj->u_pages = pages; 447 aobj->u_swhashmask = new_hashmask; 448 449 return 0; 450 } 451 452 int 453 uao_shrink_convert(struct uvm_object *uobj, int pages) 454 { 455 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 456 struct uao_swhash_elt *elt; 457 int i, *new_swslots; 458 459 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, 460 M_WAITOK | M_CANFAIL | M_ZERO); 461 if (new_swslots == NULL) 462 return ENOMEM; 463 464 uao_shrink_flush(uobj, pages, aobj->u_pages); 465 466 /* Convert swap slots from hash to array. */ 467 for (i = 0; i < pages; i++) { 468 elt = uao_find_swhash_elt(aobj, i, FALSE); 469 if (elt != NULL) { 470 new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i); 471 if (new_swslots[i] != 0) 472 elt->count--; 473 if (elt->count == 0) { 474 LIST_REMOVE(elt, list); 475 pool_put(&uao_swhash_elt_pool, elt); 476 } 477 } 478 } 479 480 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); 481 482 aobj->u_swslots = new_swslots; 483 aobj->u_pages = pages; 484 485 return 0; 486 } 487 488 int 489 uao_shrink_array(struct uvm_object *uobj, int pages) 490 { 491 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 492 int i, *new_swslots; 493 494 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, 495 M_WAITOK | M_CANFAIL | M_ZERO); 496 if (new_swslots == NULL) 497 return ENOMEM; 498 499 uao_shrink_flush(uobj, pages, aobj->u_pages); 500 501 for (i = 0; i < pages; i++) 502 new_swslots[i] = aobj->u_swslots[i]; 503 504 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); 505 506 aobj->u_swslots = new_swslots; 507 aobj->u_pages = pages; 508 509 return 0; 510 } 511 512 int 513 uao_shrink(struct uvm_object *uobj, int pages) 514 { 515 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 516 517 KASSERT(pages < aobj->u_pages); 518 519 /* 520 * Distinguish between three possible cases: 521 * 1. aobj uses hash and must be converted to array. 522 * 2. aobj uses array and array size needs to be adjusted. 523 * 3. aobj uses hash and hash size needs to be adjusted. 524 */ 525 if (pages > UAO_SWHASH_THRESHOLD) 526 return uao_shrink_hash(uobj, pages); /* case 3 */ 527 else if (aobj->u_pages > UAO_SWHASH_THRESHOLD) 528 return uao_shrink_convert(uobj, pages); /* case 1 */ 529 else 530 return uao_shrink_array(uobj, pages); /* case 2 */ 531 } 532 533 /* 534 * Grow an aobj to a given number of pages. Right now we only adjust the swap 535 * slots. We could additionally handle page allocation directly, so that they 536 * don't happen through uvm_fault(). That would allow us to use another 537 * mechanism for the swap slots other than malloc(). It is thus mandatory that 538 * the caller of these functions does not allow faults to happen in case of 539 * growth error. 540 */ 541 int 542 uao_grow_array(struct uvm_object *uobj, int pages) 543 { 544 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 545 int i, *new_swslots; 546 547 KASSERT(aobj->u_pages <= UAO_SWHASH_THRESHOLD); 548 549 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, 550 M_WAITOK | M_CANFAIL | M_ZERO); 551 if (new_swslots == NULL) 552 return ENOMEM; 553 554 for (i = 0; i < aobj->u_pages; i++) 555 new_swslots[i] = aobj->u_swslots[i]; 556 557 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); 558 559 aobj->u_swslots = new_swslots; 560 aobj->u_pages = pages; 561 562 return 0; 563 } 564 565 int 566 uao_grow_hash(struct uvm_object *uobj, int pages) 567 { 568 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 569 struct uao_swhash *new_swhash; 570 struct uao_swhash_elt *elt; 571 unsigned long new_hashmask; 572 int i; 573 574 KASSERT(pages > UAO_SWHASH_THRESHOLD); 575 576 /* 577 * If the size of the hash table doesn't change, all we need to do is 578 * to adjust the page count. 579 */ 580 if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) { 581 aobj->u_pages = pages; 582 return 0; 583 } 584 585 KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages)); 586 587 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, 588 M_WAITOK | M_CANFAIL, &new_hashmask); 589 if (new_swhash == NULL) 590 return ENOMEM; 591 592 for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) { 593 while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) { 594 elt = LIST_FIRST(&aobj->u_swhash[i]); 595 LIST_REMOVE(elt, list); 596 LIST_INSERT_HEAD(&new_swhash[i], elt, list); 597 } 598 } 599 600 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); 601 602 aobj->u_swhash = new_swhash; 603 aobj->u_pages = pages; 604 aobj->u_swhashmask = new_hashmask; 605 606 return 0; 607 } 608 609 int 610 uao_grow_convert(struct uvm_object *uobj, int pages) 611 { 612 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 613 struct uao_swhash *new_swhash; 614 struct uao_swhash_elt *elt; 615 unsigned long new_hashmask; 616 int i, *old_swslots; 617 618 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, 619 M_WAITOK | M_CANFAIL, &new_hashmask); 620 if (new_swhash == NULL) 621 return ENOMEM; 622 623 /* Set these now, so we can use uao_find_swhash_elt(). */ 624 old_swslots = aobj->u_swslots; 625 aobj->u_swhash = new_swhash; 626 aobj->u_swhashmask = new_hashmask; 627 628 for (i = 0; i < aobj->u_pages; i++) { 629 if (old_swslots[i] != 0) { 630 elt = uao_find_swhash_elt(aobj, i, TRUE); 631 elt->count++; 632 UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i]; 633 } 634 } 635 636 free(old_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); 637 aobj->u_pages = pages; 638 639 return 0; 640 } 641 642 int 643 uao_grow(struct uvm_object *uobj, int pages) 644 { 645 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 646 647 KASSERT(pages > aobj->u_pages); 648 649 /* 650 * Distinguish between three possible cases: 651 * 1. aobj uses hash and hash size needs to be adjusted. 652 * 2. aobj uses array and array size needs to be adjusted. 653 * 3. aobj uses array and must be converted to hash. 654 */ 655 if (pages <= UAO_SWHASH_THRESHOLD) 656 return uao_grow_array(uobj, pages); /* case 2 */ 657 else if (aobj->u_pages > UAO_SWHASH_THRESHOLD) 658 return uao_grow_hash(uobj, pages); /* case 1 */ 659 else 660 return uao_grow_convert(uobj, pages); 661 } 662 #endif /* TMPFS */ 663 664 /* 665 * uao_create: create an aobj of the given size and return its uvm_object. 666 * 667 * => for normal use, flags are zero or UAO_FLAG_CANFAIL. 668 * => for the kernel object, the flags are: 669 * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once) 670 * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ") 671 */ 672 struct uvm_object * 673 uao_create(vsize_t size, int flags) 674 { 675 static struct uvm_aobj kernel_object_store; 676 static struct rwlock bootstrap_kernel_object_lock; 677 static int kobj_alloced = 0; 678 int pages = round_page(size) >> PAGE_SHIFT; 679 struct uvm_aobj *aobj; 680 int refs; 681 682 /* 683 * Allocate a new aobj, unless kernel object is requested. 684 */ 685 if (flags & UAO_FLAG_KERNOBJ) { 686 KASSERT(!kobj_alloced); 687 aobj = &kernel_object_store; 688 aobj->u_pages = pages; 689 aobj->u_flags = UAO_FLAG_NOSWAP; 690 refs = UVM_OBJ_KERN; 691 kobj_alloced = UAO_FLAG_KERNOBJ; 692 } else if (flags & UAO_FLAG_KERNSWAP) { 693 KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ); 694 aobj = &kernel_object_store; 695 kobj_alloced = UAO_FLAG_KERNSWAP; 696 } else { 697 aobj = pool_get(&uvm_aobj_pool, PR_WAITOK); 698 aobj->u_pages = pages; 699 aobj->u_flags = 0; 700 refs = 1; 701 } 702 703 /* 704 * allocate hash/array if necessary 705 */ 706 if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) { 707 int mflags; 708 709 if (flags) 710 mflags = M_NOWAIT; 711 else 712 mflags = M_WAITOK; 713 714 /* allocate hash table or array depending on object size */ 715 if (UAO_USES_SWHASH(aobj)) { 716 aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), 717 M_UVMAOBJ, mflags, &aobj->u_swhashmask); 718 if (aobj->u_swhash == NULL) { 719 if (flags & UAO_FLAG_CANFAIL) { 720 pool_put(&uvm_aobj_pool, aobj); 721 return NULL; 722 } 723 panic("uao_create: hashinit swhash failed"); 724 } 725 } else { 726 aobj->u_swslots = mallocarray(pages, sizeof(int), 727 M_UVMAOBJ, mflags|M_ZERO); 728 if (aobj->u_swslots == NULL) { 729 if (flags & UAO_FLAG_CANFAIL) { 730 pool_put(&uvm_aobj_pool, aobj); 731 return NULL; 732 } 733 panic("uao_create: malloc swslots failed"); 734 } 735 } 736 737 if (flags & UAO_FLAG_KERNSWAP) { 738 aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */ 739 return &aobj->u_obj; 740 /* done! */ 741 } 742 } 743 744 /* 745 * Initialise UVM object. 746 */ 747 uvm_obj_init(&aobj->u_obj, &aobj_pager, refs); 748 if (flags & UAO_FLAG_KERNOBJ) { 749 /* Use a temporary static lock for kernel_object. */ 750 rw_init(&bootstrap_kernel_object_lock, "kobjlk"); 751 uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock); 752 } 753 754 /* 755 * now that aobj is ready, add it to the global list 756 */ 757 mtx_enter(&uao_list_lock); 758 LIST_INSERT_HEAD(&uao_list, aobj, u_list); 759 mtx_leave(&uao_list_lock); 760 761 return &aobj->u_obj; 762 } 763 764 765 766 /* 767 * uao_init: set up aobj pager subsystem 768 * 769 * => called at boot time from uvm_pager_init() 770 */ 771 void 772 uao_init(void) 773 { 774 /* 775 * NOTE: Pages for this pool must not come from a pageable 776 * kernel map! 777 */ 778 pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0, 779 IPL_NONE, PR_WAITOK, "uaoeltpl", NULL); 780 pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0, 781 IPL_NONE, PR_WAITOK, "aobjpl", NULL); 782 } 783 784 /* 785 * uao_reference: hold a reference to an anonymous UVM object. 786 */ 787 void 788 uao_reference(struct uvm_object *uobj) 789 { 790 /* Kernel object is persistent. */ 791 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 792 return; 793 794 atomic_inc_int(&uobj->uo_refs); 795 } 796 797 798 /* 799 * uao_detach: drop a reference to an anonymous UVM object. 800 */ 801 void 802 uao_detach(struct uvm_object *uobj) 803 { 804 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 805 struct vm_page *pg; 806 807 /* 808 * Detaching from kernel_object is a NOP. 809 */ 810 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 811 return; 812 813 /* 814 * Drop the reference. If it was the last one, destroy the object. 815 */ 816 if (atomic_dec_int_nv(&uobj->uo_refs) > 0) { 817 return; 818 } 819 820 /* 821 * Remove the aobj from the global list. 822 */ 823 mtx_enter(&uao_list_lock); 824 LIST_REMOVE(aobj, u_list); 825 mtx_leave(&uao_list_lock); 826 827 /* 828 * Free all the pages left in the aobj. For each page, when the 829 * page is no longer busy (and thus after any disk I/O that it is 830 * involved in is complete), release any swap resources and free 831 * the page itself. 832 */ 833 rw_enter(uobj->vmobjlock, RW_WRITE); 834 while ((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) { 835 pmap_page_protect(pg, PROT_NONE); 836 if (pg->pg_flags & PG_BUSY) { 837 atomic_setbits_int(&pg->pg_flags, PG_WANTED); 838 rwsleep_nsec(pg, uobj->vmobjlock, PVM, "uao_det", 839 INFSLP); 840 continue; 841 } 842 uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT); 843 uvm_lock_pageq(); 844 uvm_pagefree(pg); 845 uvm_unlock_pageq(); 846 } 847 848 /* 849 * Finally, free the anonymous UVM object itself. 850 */ 851 uao_free(aobj); 852 } 853 854 /* 855 * uao_flush: flush pages out of a uvm object 856 * 857 * => if PGO_CLEANIT is not set, then we will not block. 858 * => if PGO_ALLPAGE is set, then all pages in the object are valid targets 859 * for flushing. 860 * => NOTE: we are allowed to lock the page queues, so the caller 861 * must not be holding the lock on them [e.g. pagedaemon had 862 * better not call us with the queues locked] 863 * => we return TRUE unless we encountered some sort of I/O error 864 * XXXJRT currently never happens, as we never directly initiate 865 * XXXJRT I/O 866 */ 867 boolean_t 868 uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) 869 { 870 struct uvm_aobj *aobj = (struct uvm_aobj *) uobj; 871 struct vm_page *pg; 872 voff_t curoff; 873 874 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 875 KASSERT(rw_write_held(uobj->vmobjlock)); 876 877 if (flags & PGO_ALLPAGES) { 878 start = 0; 879 stop = (voff_t)aobj->u_pages << PAGE_SHIFT; 880 } else { 881 start = trunc_page(start); 882 stop = round_page(stop); 883 if (stop > ((voff_t)aobj->u_pages << PAGE_SHIFT)) { 884 printf("uao_flush: strange, got an out of range " 885 "flush (fixed)\n"); 886 stop = (voff_t)aobj->u_pages << PAGE_SHIFT; 887 } 888 } 889 890 /* 891 * Don't need to do any work here if we're not freeing 892 * or deactivating pages. 893 */ 894 if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { 895 return TRUE; 896 } 897 898 curoff = start; 899 for (;;) { 900 if (curoff < stop) { 901 pg = uvm_pagelookup(uobj, curoff); 902 curoff += PAGE_SIZE; 903 if (pg == NULL) 904 continue; 905 } else { 906 break; 907 } 908 909 /* Make sure page is unbusy, else wait for it. */ 910 if (pg->pg_flags & PG_BUSY) { 911 atomic_setbits_int(&pg->pg_flags, PG_WANTED); 912 rwsleep_nsec(pg, uobj->vmobjlock, PVM, "uaoflsh", 913 INFSLP); 914 curoff -= PAGE_SIZE; 915 continue; 916 } 917 918 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 919 /* 920 * XXX In these first 3 cases, we always just 921 * XXX deactivate the page. We may want to 922 * XXX handle the different cases more specifically 923 * XXX in the future. 924 */ 925 case PGO_CLEANIT|PGO_FREE: 926 /* FALLTHROUGH */ 927 case PGO_CLEANIT|PGO_DEACTIVATE: 928 /* FALLTHROUGH */ 929 case PGO_DEACTIVATE: 930 deactivate_it: 931 if (pg->wire_count != 0) 932 continue; 933 934 uvm_lock_pageq(); 935 pmap_page_protect(pg, PROT_NONE); 936 uvm_pagedeactivate(pg); 937 uvm_unlock_pageq(); 938 939 continue; 940 case PGO_FREE: 941 /* 942 * If there are multiple references to 943 * the object, just deactivate the page. 944 */ 945 if (uobj->uo_refs > 1) 946 goto deactivate_it; 947 948 /* XXX skip the page if it's wired */ 949 if (pg->wire_count != 0) 950 continue; 951 952 /* 953 * free the swap slot and the page. 954 */ 955 pmap_page_protect(pg, PROT_NONE); 956 957 /* 958 * freeing swapslot here is not strictly necessary. 959 * however, leaving it here doesn't save much 960 * because we need to update swap accounting anyway. 961 */ 962 uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); 963 uvm_lock_pageq(); 964 uvm_pagefree(pg); 965 uvm_unlock_pageq(); 966 967 continue; 968 default: 969 panic("uao_flush: weird flags"); 970 } 971 } 972 973 return TRUE; 974 } 975 976 /* 977 * uao_get: fetch me a page 978 * 979 * we have three cases: 980 * 1: page is resident -> just return the page. 981 * 2: page is zero-fill -> allocate a new page and zero it. 982 * 3: page is swapped out -> fetch the page from swap. 983 * 984 * cases 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot. 985 * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES), 986 * then we will need to return VM_PAGER_UNLOCK. 987 * 988 * => flags: PGO_ALLPAGES: get all of the pages 989 * PGO_LOCKED: fault data structures are locked 990 * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx] 991 * => NOTE: caller must check for released pages!! 992 */ 993 static int 994 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, 995 int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags) 996 { 997 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 998 voff_t current_offset; 999 vm_page_t ptmp; 1000 int lcv, gotpages, maxpages, swslot, rv, pageidx; 1001 boolean_t done; 1002 1003 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 1004 KASSERT(rw_write_held(uobj->vmobjlock)); 1005 1006 /* 1007 * get number of pages 1008 */ 1009 maxpages = *npagesp; 1010 1011 if (flags & PGO_LOCKED) { 1012 /* 1013 * step 1a: get pages that are already resident. only do 1014 * this if the data structures are locked (i.e. the first 1015 * time through). 1016 */ 1017 1018 done = TRUE; /* be optimistic */ 1019 gotpages = 0; /* # of pages we got so far */ 1020 1021 for (lcv = 0, current_offset = offset ; lcv < maxpages ; 1022 lcv++, current_offset += PAGE_SIZE) { 1023 /* do we care about this page? if not, skip it */ 1024 if (pps[lcv] == PGO_DONTCARE) 1025 continue; 1026 1027 ptmp = uvm_pagelookup(uobj, current_offset); 1028 1029 /* 1030 * if page is new, attempt to allocate the page, 1031 * zero-fill'd. 1032 */ 1033 if (ptmp == NULL && uao_find_swslot(uobj, 1034 current_offset >> PAGE_SHIFT) == 0) { 1035 ptmp = uvm_pagealloc(uobj, current_offset, 1036 NULL, UVM_PGA_ZERO); 1037 if (ptmp) { 1038 /* new page */ 1039 atomic_clearbits_int(&ptmp->pg_flags, 1040 PG_BUSY|PG_FAKE); 1041 atomic_setbits_int(&ptmp->pg_flags, 1042 PQ_AOBJ); 1043 UVM_PAGE_OWN(ptmp, NULL); 1044 } 1045 } 1046 1047 /* 1048 * to be useful must get a non-busy page 1049 */ 1050 if (ptmp == NULL || 1051 (ptmp->pg_flags & PG_BUSY) != 0) { 1052 if (lcv == centeridx || 1053 (flags & PGO_ALLPAGES) != 0) 1054 /* need to do a wait or I/O! */ 1055 done = FALSE; 1056 continue; 1057 } 1058 1059 /* 1060 * useful page: plug it in our result array 1061 */ 1062 atomic_setbits_int(&ptmp->pg_flags, PG_BUSY); 1063 UVM_PAGE_OWN(ptmp, "uao_get1"); 1064 pps[lcv] = ptmp; 1065 gotpages++; 1066 1067 } 1068 1069 /* 1070 * step 1b: now we've either done everything needed or we 1071 * to unlock and do some waiting or I/O. 1072 */ 1073 *npagesp = gotpages; 1074 if (done) 1075 /* bingo! */ 1076 return VM_PAGER_OK; 1077 else 1078 /* EEK! Need to unlock and I/O */ 1079 return VM_PAGER_UNLOCK; 1080 } 1081 1082 /* 1083 * step 2: get non-resident or busy pages. 1084 * data structures are unlocked. 1085 */ 1086 for (lcv = 0, current_offset = offset ; lcv < maxpages ; 1087 lcv++, current_offset += PAGE_SIZE) { 1088 /* 1089 * - skip over pages we've already gotten or don't want 1090 * - skip over pages we don't _have_ to get 1091 */ 1092 if (pps[lcv] != NULL || 1093 (lcv != centeridx && (flags & PGO_ALLPAGES) == 0)) 1094 continue; 1095 1096 pageidx = current_offset >> PAGE_SHIFT; 1097 1098 /* 1099 * we have yet to locate the current page (pps[lcv]). we 1100 * first look for a page that is already at the current offset. 1101 * if we find a page, we check to see if it is busy or 1102 * released. if that is the case, then we sleep on the page 1103 * until it is no longer busy or released and repeat the lookup. 1104 * if the page we found is neither busy nor released, then we 1105 * busy it (so we own it) and plug it into pps[lcv]. this 1106 * 'break's the following while loop and indicates we are 1107 * ready to move on to the next page in the "lcv" loop above. 1108 * 1109 * if we exit the while loop with pps[lcv] still set to NULL, 1110 * then it means that we allocated a new busy/fake/clean page 1111 * ptmp in the object and we need to do I/O to fill in the data. 1112 */ 1113 1114 /* top of "pps" while loop */ 1115 while (pps[lcv] == NULL) { 1116 /* look for a resident page */ 1117 ptmp = uvm_pagelookup(uobj, current_offset); 1118 1119 /* not resident? allocate one now (if we can) */ 1120 if (ptmp == NULL) { 1121 1122 ptmp = uvm_pagealloc(uobj, current_offset, 1123 NULL, 0); 1124 1125 /* out of RAM? */ 1126 if (ptmp == NULL) { 1127 rw_exit(uobj->vmobjlock); 1128 uvm_wait("uao_getpage"); 1129 rw_enter(uobj->vmobjlock, RW_WRITE); 1130 /* goto top of pps while loop */ 1131 continue; 1132 } 1133 1134 /* 1135 * safe with PQ's unlocked: because we just 1136 * alloc'd the page 1137 */ 1138 atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ); 1139 1140 /* 1141 * got new page ready for I/O. break pps while 1142 * loop. pps[lcv] is still NULL. 1143 */ 1144 break; 1145 } 1146 1147 /* page is there, see if we need to wait on it */ 1148 if ((ptmp->pg_flags & PG_BUSY) != 0) { 1149 atomic_setbits_int(&ptmp->pg_flags, PG_WANTED); 1150 rwsleep_nsec(ptmp, uobj->vmobjlock, PVM, 1151 "uao_get", INFSLP); 1152 continue; /* goto top of pps while loop */ 1153 } 1154 1155 /* 1156 * if we get here then the page is resident and 1157 * unbusy. we busy it now (so we own it). 1158 */ 1159 /* we own it, caller must un-busy */ 1160 atomic_setbits_int(&ptmp->pg_flags, PG_BUSY); 1161 UVM_PAGE_OWN(ptmp, "uao_get2"); 1162 pps[lcv] = ptmp; 1163 } 1164 1165 /* 1166 * if we own the valid page at the correct offset, pps[lcv] will 1167 * point to it. nothing more to do except go to the next page. 1168 */ 1169 if (pps[lcv]) 1170 continue; /* next lcv */ 1171 1172 /* 1173 * we have a "fake/busy/clean" page that we just allocated. 1174 * do the needed "i/o", either reading from swap or zeroing. 1175 */ 1176 swslot = uao_find_swslot(uobj, pageidx); 1177 1178 /* just zero the page if there's nothing in swap. */ 1179 if (swslot == 0) { 1180 /* page hasn't existed before, just zero it. */ 1181 uvm_pagezero(ptmp); 1182 } else { 1183 /* 1184 * page in the swapped-out page. 1185 * unlock object for i/o, relock when done. 1186 */ 1187 1188 rw_exit(uobj->vmobjlock); 1189 rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO); 1190 rw_enter(uobj->vmobjlock, RW_WRITE); 1191 1192 /* 1193 * I/O done. check for errors. 1194 */ 1195 if (rv != VM_PAGER_OK) { 1196 /* 1197 * remove the swap slot from the aobj 1198 * and mark the aobj as having no real slot. 1199 * don't free the swap slot, thus preventing 1200 * it from being used again. 1201 */ 1202 swslot = uao_set_swslot(&aobj->u_obj, pageidx, 1203 SWSLOT_BAD); 1204 uvm_swap_markbad(swslot, 1); 1205 1206 if (ptmp->pg_flags & PG_WANTED) 1207 wakeup(ptmp); 1208 atomic_clearbits_int(&ptmp->pg_flags, 1209 PG_WANTED|PG_BUSY); 1210 UVM_PAGE_OWN(ptmp, NULL); 1211 uvm_lock_pageq(); 1212 uvm_pagefree(ptmp); 1213 uvm_unlock_pageq(); 1214 rw_exit(uobj->vmobjlock); 1215 1216 return rv; 1217 } 1218 } 1219 1220 /* 1221 * we got the page! clear the fake flag (indicates valid 1222 * data now in page) and plug into our result array. note 1223 * that page is still busy. 1224 * 1225 * it is the callers job to: 1226 * => check if the page is released 1227 * => unbusy the page 1228 * => activate the page 1229 */ 1230 atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE); 1231 pmap_clear_modify(ptmp); /* ... and clean */ 1232 pps[lcv] = ptmp; 1233 1234 } /* lcv loop */ 1235 1236 rw_exit(uobj->vmobjlock); 1237 return VM_PAGER_OK; 1238 } 1239 1240 /* 1241 * uao_dropswap: release any swap resources from this aobj page. 1242 * 1243 * => aobj must be locked or have a reference count of 0. 1244 */ 1245 int 1246 uao_dropswap(struct uvm_object *uobj, int pageidx) 1247 { 1248 int slot; 1249 1250 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 1251 1252 slot = uao_set_swslot(uobj, pageidx, 0); 1253 if (slot) { 1254 uvm_swap_free(slot, 1); 1255 } 1256 return slot; 1257 } 1258 1259 /* 1260 * page in every page in every aobj that is paged-out to a range of swslots. 1261 * 1262 * => aobj must be locked and is returned locked. 1263 * => returns TRUE if pagein was aborted due to lack of memory. 1264 */ 1265 boolean_t 1266 uao_swap_off(int startslot, int endslot) 1267 { 1268 struct uvm_aobj *aobj; 1269 1270 /* 1271 * Walk the list of all anonymous UVM objects. Grab the first. 1272 */ 1273 mtx_enter(&uao_list_lock); 1274 if ((aobj = LIST_FIRST(&uao_list)) == NULL) { 1275 mtx_leave(&uao_list_lock); 1276 return FALSE; 1277 } 1278 uao_reference(&aobj->u_obj); 1279 1280 do { 1281 struct uvm_aobj *nextaobj; 1282 boolean_t rv; 1283 1284 /* 1285 * Prefetch the next object and immediately hold a reference 1286 * on it, so neither the current nor the next entry could 1287 * disappear while we are iterating. 1288 */ 1289 if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) { 1290 uao_reference(&nextaobj->u_obj); 1291 } 1292 mtx_leave(&uao_list_lock); 1293 1294 /* 1295 * Page in all pages in the swap slot range. 1296 */ 1297 rw_enter(aobj->u_obj.vmobjlock, RW_WRITE); 1298 rv = uao_pagein(aobj, startslot, endslot); 1299 rw_exit(aobj->u_obj.vmobjlock); 1300 1301 /* Drop the reference of the current object. */ 1302 uao_detach(&aobj->u_obj); 1303 if (rv) { 1304 if (nextaobj) { 1305 uao_detach(&nextaobj->u_obj); 1306 } 1307 return rv; 1308 } 1309 1310 aobj = nextaobj; 1311 mtx_enter(&uao_list_lock); 1312 } while (aobj); 1313 1314 /* 1315 * done with traversal, unlock the list 1316 */ 1317 mtx_leave(&uao_list_lock); 1318 return FALSE; 1319 } 1320 1321 /* 1322 * page in any pages from aobj in the given range. 1323 * 1324 * => returns TRUE if pagein was aborted due to lack of memory. 1325 */ 1326 static boolean_t 1327 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot) 1328 { 1329 boolean_t rv; 1330 1331 if (UAO_USES_SWHASH(aobj)) { 1332 struct uao_swhash_elt *elt; 1333 int bucket; 1334 1335 restart: 1336 for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) { 1337 for (elt = LIST_FIRST(&aobj->u_swhash[bucket]); 1338 elt != NULL; 1339 elt = LIST_NEXT(elt, list)) { 1340 int i; 1341 1342 for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) { 1343 int slot = elt->slots[i]; 1344 1345 /* 1346 * if the slot isn't in range, skip it. 1347 */ 1348 if (slot < startslot || 1349 slot >= endslot) { 1350 continue; 1351 } 1352 1353 /* 1354 * process the page, 1355 * the start over on this object 1356 * since the swhash elt 1357 * may have been freed. 1358 */ 1359 rv = uao_pagein_page(aobj, 1360 UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i); 1361 if (rv) { 1362 return rv; 1363 } 1364 goto restart; 1365 } 1366 } 1367 } 1368 } else { 1369 int i; 1370 1371 for (i = 0; i < aobj->u_pages; i++) { 1372 int slot = aobj->u_swslots[i]; 1373 1374 /* 1375 * if the slot isn't in range, skip it 1376 */ 1377 if (slot < startslot || slot >= endslot) { 1378 continue; 1379 } 1380 1381 /* 1382 * process the page. 1383 */ 1384 rv = uao_pagein_page(aobj, i); 1385 if (rv) { 1386 return rv; 1387 } 1388 } 1389 } 1390 1391 return FALSE; 1392 } 1393 1394 /* 1395 * uao_pagein_page: page in a single page from an anonymous UVM object. 1396 * 1397 * => Returns TRUE if pagein was aborted due to lack of memory. 1398 */ 1399 static boolean_t 1400 uao_pagein_page(struct uvm_aobj *aobj, int pageidx) 1401 { 1402 struct uvm_object *uobj = &aobj->u_obj; 1403 struct vm_page *pg; 1404 int rv, slot, npages; 1405 1406 pg = NULL; 1407 npages = 1; 1408 1409 KASSERT(rw_write_held(uobj->vmobjlock)); 1410 rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT, 1411 &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0); 1412 1413 /* 1414 * relock and finish up. 1415 */ 1416 rw_enter(uobj->vmobjlock, RW_WRITE); 1417 switch (rv) { 1418 case VM_PAGER_OK: 1419 break; 1420 1421 case VM_PAGER_ERROR: 1422 case VM_PAGER_REFAULT: 1423 /* 1424 * nothing more to do on errors. 1425 * VM_PAGER_REFAULT can only mean that the anon was freed, 1426 * so again there's nothing to do. 1427 */ 1428 return FALSE; 1429 } 1430 1431 /* 1432 * ok, we've got the page now. 1433 * mark it as dirty, clear its swslot and un-busy it. 1434 */ 1435 slot = uao_set_swslot(&aobj->u_obj, pageidx, 0); 1436 uvm_swap_free(slot, 1); 1437 atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE); 1438 UVM_PAGE_OWN(pg, NULL); 1439 1440 /* 1441 * deactivate the page (to put it on a page queue). 1442 */ 1443 pmap_clear_reference(pg); 1444 uvm_lock_pageq(); 1445 uvm_pagedeactivate(pg); 1446 uvm_unlock_pageq(); 1447 1448 return FALSE; 1449 } 1450 1451 /* 1452 * uao_dropswap_range: drop swapslots in the range. 1453 * 1454 * => aobj must be locked and is returned locked. 1455 * => start is inclusive. end is exclusive. 1456 */ 1457 void 1458 uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end) 1459 { 1460 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 1461 int swpgonlydelta = 0; 1462 1463 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 1464 KASSERT(rw_write_held(uobj->vmobjlock)); 1465 1466 if (end == 0) { 1467 end = INT64_MAX; 1468 } 1469 1470 if (UAO_USES_SWHASH(aobj)) { 1471 int i, hashbuckets = aobj->u_swhashmask + 1; 1472 voff_t taghi; 1473 voff_t taglo; 1474 1475 taglo = UAO_SWHASH_ELT_TAG(start); 1476 taghi = UAO_SWHASH_ELT_TAG(end); 1477 1478 for (i = 0; i < hashbuckets; i++) { 1479 struct uao_swhash_elt *elt, *next; 1480 1481 for (elt = LIST_FIRST(&aobj->u_swhash[i]); 1482 elt != NULL; 1483 elt = next) { 1484 int startidx, endidx; 1485 int j; 1486 1487 next = LIST_NEXT(elt, list); 1488 1489 if (elt->tag < taglo || taghi < elt->tag) { 1490 continue; 1491 } 1492 1493 if (elt->tag == taglo) { 1494 startidx = 1495 UAO_SWHASH_ELT_PAGESLOT_IDX(start); 1496 } else { 1497 startidx = 0; 1498 } 1499 1500 if (elt->tag == taghi) { 1501 endidx = 1502 UAO_SWHASH_ELT_PAGESLOT_IDX(end); 1503 } else { 1504 endidx = UAO_SWHASH_CLUSTER_SIZE; 1505 } 1506 1507 for (j = startidx; j < endidx; j++) { 1508 int slot = elt->slots[j]; 1509 1510 KASSERT(uvm_pagelookup(&aobj->u_obj, 1511 (voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt) 1512 + j) << PAGE_SHIFT) == NULL); 1513 1514 if (slot > 0) { 1515 uvm_swap_free(slot, 1); 1516 swpgonlydelta++; 1517 KASSERT(elt->count > 0); 1518 elt->slots[j] = 0; 1519 elt->count--; 1520 } 1521 } 1522 1523 if (elt->count == 0) { 1524 LIST_REMOVE(elt, list); 1525 pool_put(&uao_swhash_elt_pool, elt); 1526 } 1527 } 1528 } 1529 } else { 1530 int i; 1531 1532 if (aobj->u_pages < end) { 1533 end = aobj->u_pages; 1534 } 1535 for (i = start; i < end; i++) { 1536 int slot = aobj->u_swslots[i]; 1537 1538 if (slot > 0) { 1539 uvm_swap_free(slot, 1); 1540 swpgonlydelta++; 1541 } 1542 } 1543 } 1544 1545 /* 1546 * adjust the counter of pages only in swap for all 1547 * the swap slots we've freed. 1548 */ 1549 if (swpgonlydelta > 0) { 1550 KASSERT(uvmexp.swpgonly >= swpgonlydelta); 1551 atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta); 1552 } 1553 } 1554