1 /* 2 * Copyright (c) 1990 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * %sccs.include.redist.c% 11 * 12 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 13 * 14 * @(#)swap_pager.c 8.6 (Berkeley) 01/12/94 15 */ 16 17 /* 18 * Quick hack to page to dedicated partition(s). 19 * TODO: 20 * Add multiprocessor locks 21 * Deal with async writes in a better fashion 22 */ 23 24 #include <sys/param.h> 25 #include <sys/systm.h> 26 #include <sys/proc.h> 27 #include <sys/buf.h> 28 #include <sys/map.h> 29 #include <sys/vnode.h> 30 #include <sys/malloc.h> 31 32 #include <miscfs/specfs/specdev.h> 33 34 #include <vm/vm.h> 35 #include <vm/vm_page.h> 36 #include <vm/vm_pageout.h> 37 #include <vm/swap_pager.h> 38 39 #define NSWSIZES 16 /* size of swtab */ 40 #define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */ 41 #ifndef NPENDINGIO 42 #define NPENDINGIO 64 /* max # of pending cleans */ 43 #endif 44 45 #ifdef DEBUG 46 int swpagerdebug = 0x100; 47 #define SDB_FOLLOW 0x001 48 #define SDB_INIT 0x002 49 #define SDB_ALLOC 0x004 50 #define SDB_IO 0x008 51 #define SDB_WRITE 0x010 52 #define SDB_FAIL 0x020 53 #define SDB_ALLOCBLK 0x040 54 #define SDB_FULL 0x080 55 #define SDB_ANOM 0x100 56 #define SDB_ANOMPANIC 0x200 57 #define SDB_CLUSTER 0x400 58 #define SDB_PARANOIA 0x800 59 #endif 60 61 TAILQ_HEAD(swpclean, swpagerclean); 62 63 struct swpagerclean { 64 TAILQ_ENTRY(swpagerclean) spc_list; 65 int spc_flags; 66 struct buf *spc_bp; 67 sw_pager_t spc_swp; 68 vm_offset_t spc_kva; 69 vm_page_t spc_m; 70 int spc_npages; 71 } swcleanlist[NPENDINGIO]; 72 typedef struct swpagerclean *swp_clean_t; 73 74 /* spc_flags values */ 75 #define SPC_FREE 0x00 76 #define SPC_BUSY 0x01 77 #define SPC_DONE 0x02 78 #define SPC_ERROR 0x04 79 80 struct swtab { 81 vm_size_t st_osize; /* size of object (bytes) */ 82 int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */ 83 #ifdef DEBUG 84 u_long st_inuse; /* number in this range in use */ 85 u_long st_usecnt; /* total used of this size */ 86 #endif 87 } swtab[NSWSIZES+1]; 88 89 #ifdef DEBUG 90 int swap_pager_poip; /* pageouts in progress */ 91 int swap_pager_piip; /* pageins in progress */ 92 #endif 93 94 int swap_pager_maxcluster; /* maximum cluster size */ 95 int swap_pager_npendingio; /* number of pager clean structs */ 96 97 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 98 struct swpclean swap_pager_free; /* list of free pager clean structs */ 99 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 100 101 static int swap_pager_finish __P((swp_clean_t)); 102 static void swap_pager_init __P((void)); 103 static vm_pager_t swap_pager_alloc 104 __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); 105 static void swap_pager_clean __P((int)); 106 #ifdef DEBUG 107 static void swap_pager_clean_check __P((vm_page_t *, int, int)); 108 #endif 109 static void swap_pager_cluster 110 __P((vm_pager_t, vm_offset_t, 111 vm_offset_t *, vm_offset_t *)); 112 static void swap_pager_dealloc __P((vm_pager_t)); 113 static int swap_pager_getpage 114 __P((vm_pager_t, vm_page_t *, int, boolean_t)); 115 static boolean_t swap_pager_haspage __P((vm_pager_t, vm_offset_t)); 116 static int swap_pager_io __P((sw_pager_t, vm_page_t *, int, int)); 117 static void swap_pager_iodone __P((struct buf *)); 118 static int swap_pager_putpage 119 __P((vm_pager_t, vm_page_t *, int, boolean_t)); 120 121 struct pagerops swappagerops = { 122 swap_pager_init, 123 swap_pager_alloc, 124 swap_pager_dealloc, 125 swap_pager_getpage, 126 swap_pager_putpage, 127 swap_pager_haspage, 128 swap_pager_cluster 129 }; 130 131 static void 132 swap_pager_init() 133 { 134 register swp_clean_t spc; 135 register int i, bsize; 136 extern int dmmin, dmmax; 137 int maxbsize; 138 139 #ifdef DEBUG 140 if (swpagerdebug & (SDB_FOLLOW|SDB_INIT)) 141 printf("swpg_init()\n"); 142 #endif 143 dfltpagerops = &swappagerops; 144 TAILQ_INIT(&swap_pager_list); 145 146 /* 147 * Allocate async IO structures. 148 * 149 * XXX it would be nice if we could do this dynamically based on 150 * the value of nswbuf (since we are ultimately limited by that) 151 * but neither nswbuf or malloc has been initialized yet. So the 152 * structs are statically allocated above. 153 */ 154 swap_pager_npendingio = NPENDINGIO; 155 156 /* 157 * Initialize clean lists 158 */ 159 TAILQ_INIT(&swap_pager_inuse); 160 TAILQ_INIT(&swap_pager_free); 161 for (i = 0, spc = swcleanlist; i < swap_pager_npendingio; i++, spc++) { 162 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 163 spc->spc_flags = SPC_FREE; 164 } 165 166 /* 167 * Calculate the swap allocation constants. 168 */ 169 if (dmmin == 0) { 170 dmmin = DMMIN; 171 if (dmmin < CLBYTES/DEV_BSIZE) 172 dmmin = CLBYTES/DEV_BSIZE; 173 } 174 if (dmmax == 0) 175 dmmax = DMMAX; 176 177 /* 178 * Fill in our table of object size vs. allocation size 179 */ 180 bsize = btodb(PAGE_SIZE); 181 if (bsize < dmmin) 182 bsize = dmmin; 183 maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE); 184 if (maxbsize > dmmax) 185 maxbsize = dmmax; 186 for (i = 0; i < NSWSIZES; i++) { 187 swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); 188 swtab[i].st_bsize = bsize; 189 if (bsize <= btodb(MAXPHYS)) 190 swap_pager_maxcluster = dbtob(bsize); 191 #ifdef DEBUG 192 if (swpagerdebug & SDB_INIT) 193 printf("swpg_init: ix %d, size %x, bsize %x\n", 194 i, swtab[i].st_osize, swtab[i].st_bsize); 195 #endif 196 if (bsize >= maxbsize) 197 break; 198 bsize *= 2; 199 } 200 swtab[i].st_osize = 0; 201 swtab[i].st_bsize = bsize; 202 } 203 204 /* 205 * Allocate a pager structure and associated resources. 206 * Note that if we are called from the pageout daemon (handle == NULL) 207 * we should not wait for memory as it could resulting in deadlock. 208 */ 209 static vm_pager_t 210 swap_pager_alloc(handle, size, prot, foff) 211 caddr_t handle; 212 register vm_size_t size; 213 vm_prot_t prot; 214 vm_offset_t foff; 215 { 216 register vm_pager_t pager; 217 register sw_pager_t swp; 218 struct swtab *swt; 219 int waitok; 220 221 #ifdef DEBUG 222 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) 223 printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot); 224 #endif 225 /* 226 * If this is a "named" anonymous region, look it up and 227 * return the appropriate pager if it exists. 228 */ 229 if (handle) { 230 pager = vm_pager_lookup(&swap_pager_list, handle); 231 if (pager != NULL) { 232 /* 233 * Use vm_object_lookup to gain a reference 234 * to the object and also to remove from the 235 * object cache. 236 */ 237 if (vm_object_lookup(pager) == NULL) 238 panic("swap_pager_alloc: bad object"); 239 return(pager); 240 } 241 } 242 /* 243 * Pager doesn't exist, allocate swap management resources 244 * and initialize. 245 */ 246 waitok = handle ? M_WAITOK : M_NOWAIT; 247 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 248 if (pager == NULL) 249 return(NULL); 250 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 251 if (swp == NULL) { 252 #ifdef DEBUG 253 if (swpagerdebug & SDB_FAIL) 254 printf("swpg_alloc: swpager malloc failed\n"); 255 #endif 256 free((caddr_t)pager, M_VMPAGER); 257 return(NULL); 258 } 259 size = round_page(size); 260 for (swt = swtab; swt->st_osize; swt++) 261 if (size <= swt->st_osize) 262 break; 263 #ifdef DEBUG 264 swt->st_inuse++; 265 swt->st_usecnt++; 266 #endif 267 swp->sw_osize = size; 268 swp->sw_bsize = swt->st_bsize; 269 swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize; 270 swp->sw_blocks = (sw_blk_t) 271 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 272 M_VMPGDATA, M_NOWAIT); 273 if (swp->sw_blocks == NULL) { 274 free((caddr_t)swp, M_VMPGDATA); 275 free((caddr_t)pager, M_VMPAGER); 276 #ifdef DEBUG 277 if (swpagerdebug & SDB_FAIL) 278 printf("swpg_alloc: sw_blocks malloc failed\n"); 279 swt->st_inuse--; 280 swt->st_usecnt--; 281 #endif 282 return(FALSE); 283 } 284 bzero((caddr_t)swp->sw_blocks, 285 swp->sw_nblocks * sizeof(*swp->sw_blocks)); 286 swp->sw_poip = 0; 287 if (handle) { 288 vm_object_t object; 289 290 swp->sw_flags = SW_NAMED; 291 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 292 /* 293 * Consistant with other pagers: return with object 294 * referenced. Can't do this with handle == NULL 295 * since it might be the pageout daemon calling. 296 */ 297 object = vm_object_allocate(size); 298 vm_object_enter(object, pager); 299 vm_object_setpager(object, pager, 0, FALSE); 300 } else { 301 swp->sw_flags = 0; 302 pager->pg_list.tqe_next = NULL; 303 pager->pg_list.tqe_prev = NULL; 304 } 305 pager->pg_handle = handle; 306 pager->pg_ops = &swappagerops; 307 pager->pg_type = PG_SWAP; 308 pager->pg_flags = PG_CLUSTERPUT; 309 pager->pg_data = swp; 310 311 #ifdef DEBUG 312 if (swpagerdebug & SDB_ALLOC) 313 printf("swpg_alloc: pg_data %x, %x of %x at %x\n", 314 swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks); 315 #endif 316 return(pager); 317 } 318 319 static void 320 swap_pager_dealloc(pager) 321 vm_pager_t pager; 322 { 323 register int i; 324 register sw_blk_t bp; 325 register sw_pager_t swp; 326 struct swtab *swt; 327 int s; 328 329 #ifdef DEBUG 330 /* save panic time state */ 331 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 332 return; 333 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) 334 printf("swpg_dealloc(%x)\n", pager); 335 #endif 336 /* 337 * Remove from list right away so lookups will fail if we 338 * block for pageout completion. 339 */ 340 swp = (sw_pager_t) pager->pg_data; 341 if (swp->sw_flags & SW_NAMED) { 342 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 343 swp->sw_flags &= ~SW_NAMED; 344 } 345 #ifdef DEBUG 346 for (swt = swtab; swt->st_osize; swt++) 347 if (swp->sw_osize <= swt->st_osize) 348 break; 349 swt->st_inuse--; 350 #endif 351 352 /* 353 * Wait for all pageouts to finish and remove 354 * all entries from cleaning list. 355 */ 356 s = splbio(); 357 while (swp->sw_poip) { 358 swp->sw_flags |= SW_WANTED; 359 (void) tsleep(swp, PVM, "swpgdealloc", 0); 360 } 361 splx(s); 362 swap_pager_clean(B_WRITE); 363 364 /* 365 * Free left over swap blocks 366 */ 367 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) 368 if (bp->swb_block) { 369 #ifdef DEBUG 370 if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL)) 371 printf("swpg_dealloc: blk %x\n", 372 bp->swb_block); 373 #endif 374 rmfree(swapmap, swp->sw_bsize, bp->swb_block); 375 } 376 /* 377 * Free swap management resources 378 */ 379 free((caddr_t)swp->sw_blocks, M_VMPGDATA); 380 free((caddr_t)swp, M_VMPGDATA); 381 free((caddr_t)pager, M_VMPAGER); 382 } 383 384 static int 385 swap_pager_getpage(pager, mlist, npages, sync) 386 vm_pager_t pager; 387 vm_page_t *mlist; 388 int npages; 389 boolean_t sync; 390 { 391 #ifdef DEBUG 392 if (swpagerdebug & SDB_FOLLOW) 393 printf("swpg_getpage(%x, %x, %x, %x)\n", 394 pager, mlist, npages, sync); 395 #endif 396 return(swap_pager_io((sw_pager_t)pager->pg_data, 397 mlist, npages, B_READ)); 398 } 399 400 static int 401 swap_pager_putpage(pager, mlist, npages, sync) 402 vm_pager_t pager; 403 vm_page_t *mlist; 404 int npages; 405 boolean_t sync; 406 { 407 int flags; 408 409 #ifdef DEBUG 410 if (swpagerdebug & SDB_FOLLOW) 411 printf("swpg_putpage(%x, %x, %x, %x)\n", 412 pager, mlist, npages, sync); 413 #endif 414 if (pager == NULL) { 415 swap_pager_clean(B_WRITE); 416 return (VM_PAGER_OK); /* ??? */ 417 } 418 flags = B_WRITE; 419 if (!sync) 420 flags |= B_ASYNC; 421 return(swap_pager_io((sw_pager_t)pager->pg_data, 422 mlist, npages, flags)); 423 } 424 425 static boolean_t 426 swap_pager_haspage(pager, offset) 427 vm_pager_t pager; 428 vm_offset_t offset; 429 { 430 register sw_pager_t swp; 431 register sw_blk_t swb; 432 int ix; 433 434 #ifdef DEBUG 435 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) 436 printf("swpg_haspage(%x, %x) ", pager, offset); 437 #endif 438 swp = (sw_pager_t) pager->pg_data; 439 ix = offset / dbtob(swp->sw_bsize); 440 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 441 #ifdef DEBUG 442 if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK)) 443 printf("swpg_haspage: %x bad offset %x, ix %x\n", 444 swp->sw_blocks, offset, ix); 445 #endif 446 return(FALSE); 447 } 448 swb = &swp->sw_blocks[ix]; 449 if (swb->swb_block) 450 ix = atop(offset % dbtob(swp->sw_bsize)); 451 #ifdef DEBUG 452 if (swpagerdebug & SDB_ALLOCBLK) 453 printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix); 454 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) 455 printf("-> %c\n", 456 "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]); 457 #endif 458 if (swb->swb_block && (swb->swb_mask & (1 << ix))) 459 return(TRUE); 460 return(FALSE); 461 } 462 463 static void 464 swap_pager_cluster(pager, offset, loffset, hoffset) 465 vm_pager_t pager; 466 vm_offset_t offset; 467 vm_offset_t *loffset; 468 vm_offset_t *hoffset; 469 { 470 sw_pager_t swp; 471 register int bsize; 472 vm_offset_t loff, hoff; 473 474 #ifdef DEBUG 475 if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER)) 476 printf("swpg_cluster(%x, %x) ", pager, offset); 477 #endif 478 swp = (sw_pager_t) pager->pg_data; 479 bsize = dbtob(swp->sw_bsize); 480 if (bsize > swap_pager_maxcluster) 481 bsize = swap_pager_maxcluster; 482 483 loff = offset - (offset % bsize); 484 if (loff >= swp->sw_osize) 485 panic("swap_pager_cluster: bad offset"); 486 487 hoff = loff + bsize; 488 if (hoff > swp->sw_osize) 489 hoff = swp->sw_osize; 490 491 *loffset = loff; 492 *hoffset = hoff; 493 #ifdef DEBUG 494 if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER)) 495 printf("returns [%x-%x]\n", loff, hoff); 496 #endif 497 } 498 499 /* 500 * Scaled down version of swap(). 501 * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed. 502 * BOGUS: lower level IO routines expect a KVA so we have to map our 503 * provided physical page into the KVA to keep them happy. 504 */ 505 static int 506 swap_pager_io(swp, mlist, npages, flags) 507 register sw_pager_t swp; 508 vm_page_t *mlist; 509 int npages; 510 int flags; 511 { 512 register struct buf *bp; 513 register sw_blk_t swb; 514 register int s; 515 int ix, mask; 516 boolean_t rv; 517 vm_offset_t kva, off; 518 swp_clean_t spc; 519 vm_page_t m; 520 521 #ifdef DEBUG 522 /* save panic time state */ 523 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 524 return (VM_PAGER_FAIL); /* XXX: correct return? */ 525 if (swpagerdebug & (SDB_FOLLOW|SDB_IO)) 526 printf("swpg_io(%x, %x, %x, %x)\n", swp, mlist, npages, flags); 527 if (flags & B_READ) { 528 if (flags & B_ASYNC) 529 panic("swap_pager_io: cannot do ASYNC reads"); 530 if (npages != 1) 531 panic("swap_pager_io: cannot do clustered reads"); 532 } 533 #endif 534 535 /* 536 * First determine if the page exists in the pager if this is 537 * a sync read. This quickly handles cases where we are 538 * following shadow chains looking for the top level object 539 * with the page. 540 */ 541 m = *mlist; 542 off = m->offset + m->object->paging_offset; 543 ix = off / dbtob(swp->sw_bsize); 544 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 545 #ifdef DEBUG 546 if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) { 547 printf("swap_pager_io: no swap block on write\n"); 548 return(VM_PAGER_BAD); 549 } 550 #endif 551 return(VM_PAGER_FAIL); 552 } 553 swb = &swp->sw_blocks[ix]; 554 off = off % dbtob(swp->sw_bsize); 555 if ((flags & B_READ) && 556 (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0)) 557 return(VM_PAGER_FAIL); 558 559 /* 560 * For reads (pageins) and synchronous writes, we clean up 561 * all completed async pageouts. 562 */ 563 if ((flags & B_ASYNC) == 0) { 564 s = splbio(); 565 swap_pager_clean(flags&B_READ); 566 #ifdef DEBUG 567 if (swpagerdebug & SDB_PARANOIA) 568 swap_pager_clean_check(mlist, npages, flags&B_READ); 569 #endif 570 splx(s); 571 } 572 /* 573 * For async writes (pageouts), we cleanup completed pageouts so 574 * that all available resources are freed. Also tells us if this 575 * page is already being cleaned. If it is, or no resources 576 * are available, we try again later. 577 */ 578 else { 579 swap_pager_clean(B_WRITE); 580 #ifdef DEBUG 581 if (swpagerdebug & SDB_PARANOIA) 582 swap_pager_clean_check(mlist, npages, B_WRITE); 583 #endif 584 if (swap_pager_free.tqh_first == NULL) { 585 #ifdef DEBUG 586 if (swpagerdebug & SDB_FAIL) 587 printf("%s: no available io headers\n", 588 "swap_pager_io"); 589 #endif 590 return(VM_PAGER_AGAIN); 591 } 592 } 593 594 /* 595 * Allocate a swap block if necessary. 596 */ 597 if (swb->swb_block == 0) { 598 swb->swb_block = rmalloc(swapmap, swp->sw_bsize); 599 if (swb->swb_block == 0) { 600 #ifdef DEBUG 601 if (swpagerdebug & SDB_FAIL) 602 printf("swpg_io: rmalloc of %x failed\n", 603 swp->sw_bsize); 604 #endif 605 /* 606 * XXX this is technically a resource shortage that 607 * should return AGAIN, but the situation isn't likely 608 * to be remedied just by delaying a little while and 609 * trying again (the pageout daemon's current response 610 * to AGAIN) so we just return FAIL. 611 */ 612 return(VM_PAGER_FAIL); 613 } 614 #ifdef DEBUG 615 if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK)) 616 printf("swpg_io: %x alloc blk %x at ix %x\n", 617 swp->sw_blocks, swb->swb_block, ix); 618 #endif 619 } 620 621 /* 622 * Allocate a kernel virtual address and initialize so that PTE 623 * is available for lower level IO drivers. 624 */ 625 kva = vm_pager_map_pages(mlist, npages, !(flags & B_ASYNC)); 626 if (kva == NULL) { 627 #ifdef DEBUG 628 if (swpagerdebug & SDB_FAIL) 629 printf("%s: no KVA space to map pages\n", 630 "swap_pager_io"); 631 #endif 632 return(VM_PAGER_AGAIN); 633 } 634 635 /* 636 * Get a swap buffer header and initialize it. 637 */ 638 s = splbio(); 639 while (bswlist.b_actf == NULL) { 640 #ifdef DEBUG 641 if (swpagerdebug & SDB_ANOM) 642 printf("swap_pager_io: wait on swbuf for %x (%d)\n", 643 m, flags); 644 #endif 645 bswlist.b_flags |= B_WANTED; 646 tsleep((caddr_t)&bswlist, PSWP+1, "swpgiobuf", 0); 647 } 648 bp = bswlist.b_actf; 649 bswlist.b_actf = bp->b_actf; 650 splx(s); 651 bp->b_flags = B_BUSY | (flags & B_READ); 652 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 653 bp->b_data = (caddr_t)kva; 654 bp->b_blkno = swb->swb_block + btodb(off); 655 VHOLD(swapdev_vp); 656 bp->b_vp = swapdev_vp; 657 if (swapdev_vp->v_type == VBLK) 658 bp->b_dev = swapdev_vp->v_rdev; 659 bp->b_bcount = npages * PAGE_SIZE; 660 661 /* 662 * For writes we set up additional buffer fields, record a pageout 663 * in progress and mark that these swap blocks are now allocated. 664 */ 665 if ((bp->b_flags & B_READ) == 0) { 666 bp->b_dirtyoff = 0; 667 bp->b_dirtyend = npages * PAGE_SIZE; 668 swapdev_vp->v_numoutput++; 669 s = splbio(); 670 swp->sw_poip++; 671 splx(s); 672 mask = (~(~0 << npages)) << atop(off); 673 #ifdef DEBUG 674 swap_pager_poip++; 675 if (swpagerdebug & SDB_WRITE) 676 printf("swpg_io: write: bp=%x swp=%x poip=%d\n", 677 bp, swp, swp->sw_poip); 678 if ((swpagerdebug & SDB_ALLOCBLK) && 679 (swb->swb_mask & mask) != mask) 680 printf("swpg_io: %x write %d pages at %x+%x\n", 681 swp->sw_blocks, npages, swb->swb_block, 682 atop(off)); 683 if (swpagerdebug & SDB_CLUSTER) 684 printf("swpg_io: off=%x, npg=%x, mask=%x, bmask=%x\n", 685 off, npages, mask, swb->swb_mask); 686 #endif 687 swb->swb_mask |= mask; 688 } 689 /* 690 * If this is an async write we set up still more buffer fields 691 * and place a "cleaning" entry on the inuse queue. 692 */ 693 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { 694 #ifdef DEBUG 695 if (swap_pager_free.tqh_first == NULL) 696 panic("swpg_io: lost spc"); 697 #endif 698 spc = swap_pager_free.tqh_first; 699 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 700 #ifdef DEBUG 701 if (spc->spc_flags != SPC_FREE) 702 panic("swpg_io: bad free spc"); 703 #endif 704 spc->spc_flags = SPC_BUSY; 705 spc->spc_bp = bp; 706 spc->spc_swp = swp; 707 spc->spc_kva = kva; 708 /* 709 * Record the first page. This allows swap_pager_finish 710 * to efficiently handle the common case of a single page. 711 * For clusters, it allows us to locate the object easily 712 * and we then reconstruct the rest of the mlist from spc_kva. 713 */ 714 spc->spc_m = m; 715 spc->spc_npages = npages; 716 bp->b_flags |= B_CALL; 717 bp->b_iodone = swap_pager_iodone; 718 s = splbio(); 719 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 720 splx(s); 721 } 722 723 /* 724 * Finally, start the IO operation. 725 * If it is async we are all done, otherwise we must wait for 726 * completion and cleanup afterwards. 727 */ 728 #ifdef DEBUG 729 if (swpagerdebug & SDB_IO) 730 printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n", 731 bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m)); 732 #endif 733 VOP_STRATEGY(bp); 734 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { 735 #ifdef DEBUG 736 if (swpagerdebug & SDB_IO) 737 printf("swpg_io: IO started: bp %x\n", bp); 738 #endif 739 return(VM_PAGER_PEND); 740 } 741 s = splbio(); 742 #ifdef DEBUG 743 if (flags & B_READ) 744 swap_pager_piip++; 745 else 746 swap_pager_poip++; 747 #endif 748 while ((bp->b_flags & B_DONE) == 0) 749 (void) tsleep(bp, PVM, "swpgio", 0); 750 if ((flags & B_READ) == 0) 751 --swp->sw_poip; 752 #ifdef DEBUG 753 if (flags & B_READ) 754 --swap_pager_piip; 755 else 756 --swap_pager_poip; 757 #endif 758 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK; 759 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 760 bp->b_actf = bswlist.b_actf; 761 bswlist.b_actf = bp; 762 if (bp->b_vp) 763 brelvp(bp); 764 if (bswlist.b_flags & B_WANTED) { 765 bswlist.b_flags &= ~B_WANTED; 766 wakeup(&bswlist); 767 } 768 if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) { 769 m->flags |= PG_CLEAN; 770 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 771 } 772 splx(s); 773 #ifdef DEBUG 774 if (swpagerdebug & SDB_IO) 775 printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv); 776 if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR) 777 printf("swpg_io: IO error\n"); 778 #endif 779 vm_pager_unmap_pages(kva, npages); 780 return(rv); 781 } 782 783 static void 784 swap_pager_clean(rw) 785 int rw; 786 { 787 register swp_clean_t spc; 788 register int s, i; 789 vm_object_t object; 790 vm_page_t m; 791 792 #ifdef DEBUG 793 /* save panic time state */ 794 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 795 return; 796 if (swpagerdebug & SDB_FOLLOW) 797 printf("swpg_clean(%x)\n", rw); 798 #endif 799 800 for (;;) { 801 /* 802 * Look up and removal from inuse list must be done 803 * at splbio() to avoid conflicts with swap_pager_iodone. 804 */ 805 s = splbio(); 806 for (spc = swap_pager_inuse.tqh_first; 807 spc != NULL; 808 spc = spc->spc_list.tqe_next) { 809 /* 810 * If the operation is done, remove it from the 811 * list and process it. 812 * 813 * XXX if we can't get the object lock we also 814 * leave it on the list and try again later. 815 * Is there something better we could do? 816 */ 817 if ((spc->spc_flags & SPC_DONE) && 818 vm_object_lock_try(spc->spc_m->object)) { 819 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 820 break; 821 } 822 } 823 824 /* 825 * No operations done, thats all we can do for now. 826 */ 827 if (spc == NULL) { 828 splx(s); 829 break; 830 } 831 splx(s); 832 833 /* 834 * Found a completed operation so finish it off. 835 * Note: no longer at splbio since entry is off the list. 836 */ 837 m = spc->spc_m; 838 object = m->object; 839 840 /* 841 * Process each page in the cluster. 842 * The first page is explicitly kept in the cleaning 843 * entry, others must be reconstructed from the KVA. 844 */ 845 for (i = 0; i < spc->spc_npages; i++) { 846 if (i) 847 m = vm_pager_atop(spc->spc_kva + ptoa(i)); 848 /* 849 * If no error mark as clean and inform the pmap 850 * system. If there was an error, mark as dirty 851 * so we will try again. 852 * 853 * XXX could get stuck doing this, should give up 854 * after awhile. 855 */ 856 if (spc->spc_flags & SPC_ERROR) { 857 printf("%s: clean of page %x failed\n", 858 "swap_pager_clean", 859 VM_PAGE_TO_PHYS(m)); 860 m->flags |= PG_LAUNDRY; 861 } else { 862 m->flags |= PG_CLEAN; 863 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 864 } 865 m->flags &= ~PG_BUSY; 866 PAGE_WAKEUP(m); 867 } 868 869 /* 870 * Done with the object, decrement the paging count 871 * and unlock it. 872 */ 873 if (--object->paging_in_progress == 0) 874 wakeup(object); 875 vm_object_unlock(object); 876 877 /* 878 * Free up KVM used and put the entry back on the list. 879 */ 880 vm_pager_unmap_pages(spc->spc_kva, spc->spc_npages); 881 spc->spc_flags = SPC_FREE; 882 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 883 #ifdef DEBUG 884 if (swpagerdebug & SDB_WRITE) 885 printf("swpg_clean: free spc %x\n", spc); 886 #endif 887 } 888 } 889 890 #ifdef DEBUG 891 static void 892 swap_pager_clean_check(mlist, npages, rw) 893 vm_page_t *mlist; 894 int npages; 895 int rw; 896 { 897 register swp_clean_t spc; 898 boolean_t bad; 899 int i, j, s; 900 vm_page_t m; 901 902 if (panicstr) 903 return; 904 905 bad = FALSE; 906 s = splbio(); 907 for (spc = swap_pager_inuse.tqh_first; 908 spc != NULL; 909 spc = spc->spc_list.tqe_next) { 910 for (j = 0; j < spc->spc_npages; j++) { 911 m = vm_pager_atop(spc->spc_kva + ptoa(j)); 912 for (i = 0; i < npages; i++) 913 if (m == mlist[i]) { 914 if (swpagerdebug & SDB_ANOM) 915 printf( 916 "swpg_clean_check: %s: page %x on list, flags %x\n", 917 rw == B_WRITE ? "write" : "read", mlist[i], spc->spc_flags); 918 bad = TRUE; 919 } 920 } 921 } 922 splx(s); 923 if (bad) 924 panic("swpg_clean_check"); 925 } 926 #endif 927 928 static void 929 swap_pager_iodone(bp) 930 register struct buf *bp; 931 { 932 register swp_clean_t spc; 933 daddr_t blk; 934 int s; 935 936 #ifdef DEBUG 937 /* save panic time state */ 938 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 939 return; 940 if (swpagerdebug & SDB_FOLLOW) 941 printf("swpg_iodone(%x)\n", bp); 942 #endif 943 s = splbio(); 944 for (spc = swap_pager_inuse.tqh_first; 945 spc != NULL; 946 spc = spc->spc_list.tqe_next) 947 if (spc->spc_bp == bp) 948 break; 949 #ifdef DEBUG 950 if (spc == NULL) 951 panic("swap_pager_iodone: bp not found"); 952 #endif 953 954 spc->spc_flags &= ~SPC_BUSY; 955 spc->spc_flags |= SPC_DONE; 956 if (bp->b_flags & B_ERROR) 957 spc->spc_flags |= SPC_ERROR; 958 spc->spc_bp = NULL; 959 blk = bp->b_blkno; 960 961 #ifdef DEBUG 962 --swap_pager_poip; 963 if (swpagerdebug & SDB_WRITE) 964 printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n", 965 bp, spc->spc_swp, spc->spc_swp->sw_flags, 966 spc, spc->spc_swp->sw_poip); 967 #endif 968 969 spc->spc_swp->sw_poip--; 970 if (spc->spc_swp->sw_flags & SW_WANTED) { 971 spc->spc_swp->sw_flags &= ~SW_WANTED; 972 wakeup(spc->spc_swp); 973 } 974 975 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 976 bp->b_actf = bswlist.b_actf; 977 bswlist.b_actf = bp; 978 if (bp->b_vp) 979 brelvp(bp); 980 if (bswlist.b_flags & B_WANTED) { 981 bswlist.b_flags &= ~B_WANTED; 982 wakeup(&bswlist); 983 } 984 wakeup(&vm_pages_needed); 985 splx(s); 986 } 987