1 /* $OpenBSD: uvm_swap.c,v 1.87 2009/03/23 22:07:41 oga Exp $ */ 2 /* $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $ */ 3 4 /* 5 * Copyright (c) 1995, 1996, 1997 Matthew R. Green 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * from: NetBSD: vm_swap.c,v 1.52 1997/12/02 13:47:37 pk Exp 32 * from: Id: uvm_swap.c,v 1.1.2.42 1998/02/02 20:38:06 chuck Exp 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/buf.h> 38 #include <sys/conf.h> 39 #include <sys/proc.h> 40 #include <sys/namei.h> 41 #include <sys/disklabel.h> 42 #include <sys/errno.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/vnode.h> 46 #include <sys/file.h> 47 #include <sys/extent.h> 48 #include <sys/mount.h> 49 #include <sys/pool.h> 50 #include <sys/syscallargs.h> 51 #include <sys/swap.h> 52 53 #include <uvm/uvm.h> 54 #ifdef UVM_SWAP_ENCRYPT 55 #include <dev/rndvar.h> 56 #include <sys/syslog.h> 57 #endif 58 59 #include <miscfs/specfs/specdev.h> 60 61 /* 62 * uvm_swap.c: manage configuration and i/o to swap space. 63 */ 64 65 /* 66 * swap space is managed in the following way: 67 * 68 * each swap partition or file is described by a "swapdev" structure. 69 * each "swapdev" structure contains a "swapent" structure which contains 70 * information that is passed up to the user (via system calls). 71 * 72 * each swap partition is assigned a "priority" (int) which controls 73 * swap partition usage. 74 * 75 * the system maintains a global data structure describing all swap 76 * partitions/files. there is a sorted LIST of "swappri" structures 77 * which describe "swapdev"'s at that priority. this LIST is headed 78 * by the "swap_priority" global var. each "swappri" contains a 79 * CIRCLEQ of "swapdev" structures at that priority. 80 * 81 * locking: 82 * - swap_syscall_lock (sleep lock): this lock serializes the swapctl 83 * system call and prevents the swap priority list from changing 84 * while we are in the middle of a system call (e.g. SWAP_STATS). 85 * - uvm.swap_data_lock (simple_lock): this lock protects all swap data 86 * structures including the priority list, the swapdev structures, 87 * and the swapmap extent. 88 * 89 * each swap device has the following info: 90 * - swap device in use (could be disabled, preventing future use) 91 * - swap enabled (allows new allocations on swap) 92 * - map info in /dev/drum 93 * - vnode pointer 94 * for swap files only: 95 * - block size 96 * - max byte count in buffer 97 * - buffer 98 * - credentials to use when doing i/o to file 99 * 100 * userland controls and configures swap with the swapctl(2) system call. 101 * the sys_swapctl performs the following operations: 102 * [1] SWAP_NSWAP: returns the number of swap devices currently configured 103 * [2] SWAP_STATS: given a pointer to an array of swapent structures 104 * (passed in via "arg") of a size passed in via "misc" ... we load 105 * the current swap config into the array. 106 * [3] SWAP_ON: given a pathname in arg (could be device or file) and a 107 * priority in "misc", start swapping on it. 108 * [4] SWAP_OFF: as SWAP_ON, but stops swapping to a device 109 * [5] SWAP_CTL: changes the priority of a swap device (new priority in 110 * "misc") 111 */ 112 113 /* 114 * swapdev: describes a single swap partition/file 115 * 116 * note the following should be true: 117 * swd_inuse <= swd_nblks [number of blocks in use is <= total blocks] 118 * swd_nblks <= swd_mapsize [because mapsize includes miniroot+disklabel] 119 */ 120 struct swapdev { 121 struct swapent swd_se; 122 #define swd_dev swd_se.se_dev /* device id */ 123 #define swd_flags swd_se.se_flags /* flags:inuse/enable/fake */ 124 #define swd_priority swd_se.se_priority /* our priority */ 125 #define swd_inuse swd_se.se_inuse /* our priority */ 126 #define swd_nblks swd_se.se_nblks /* our priority */ 127 char *swd_path; /* saved pathname of device */ 128 int swd_pathlen; /* length of pathname */ 129 int swd_npages; /* #pages we can use */ 130 int swd_npginuse; /* #pages in use */ 131 int swd_npgbad; /* #pages bad */ 132 int swd_drumoffset; /* page0 offset in drum */ 133 int swd_drumsize; /* #pages in drum */ 134 struct extent *swd_ex; /* extent for this swapdev */ 135 char swd_exname[12]; /* name of extent above */ 136 struct vnode *swd_vp; /* backing vnode */ 137 CIRCLEQ_ENTRY(swapdev) swd_next; /* priority circleq */ 138 139 int swd_bsize; /* blocksize (bytes) */ 140 int swd_maxactive; /* max active i/o reqs */ 141 struct buf swd_tab; /* buffer list */ 142 struct ucred *swd_cred; /* cred for file access */ 143 #ifdef UVM_SWAP_ENCRYPT 144 #define SWD_KEY_SHIFT 7 /* One key per 0.5 MByte */ 145 #define SWD_KEY(x,y) &((x)->swd_keys[((y) - (x)->swd_drumoffset) >> SWD_KEY_SHIFT]) 146 #define SWD_KEY_SIZE(x) (((x) + (1 << SWD_KEY_SHIFT) - 1) >> SWD_KEY_SHIFT) 147 148 #define SWD_DCRYPT_SHIFT 5 149 #define SWD_DCRYPT_BITS 32 150 #define SWD_DCRYPT_MASK (SWD_DCRYPT_BITS - 1) 151 #define SWD_DCRYPT_OFF(x) ((x) >> SWD_DCRYPT_SHIFT) 152 #define SWD_DCRYPT_BIT(x) ((x) & SWD_DCRYPT_MASK) 153 #define SWD_DCRYPT_SIZE(x) (SWD_DCRYPT_OFF((x) + SWD_DCRYPT_MASK) * sizeof(u_int32_t)) 154 u_int32_t *swd_decrypt; /* bitmap for decryption */ 155 struct swap_key *swd_keys; /* keys for different parts */ 156 #endif 157 }; 158 159 /* 160 * swap device priority entry; the list is kept sorted on `spi_priority'. 161 */ 162 struct swappri { 163 int spi_priority; /* priority */ 164 CIRCLEQ_HEAD(spi_swapdev, swapdev) spi_swapdev; 165 /* circleq of swapdevs at this priority */ 166 LIST_ENTRY(swappri) spi_swappri; /* global list of pri's */ 167 }; 168 169 /* 170 * The following two structures are used to keep track of data transfers 171 * on swap devices associated with regular files. 172 * NOTE: this code is more or less a copy of vnd.c; we use the same 173 * structure names here to ease porting.. 174 */ 175 struct vndxfer { 176 struct buf *vx_bp; /* Pointer to parent buffer */ 177 struct swapdev *vx_sdp; 178 int vx_error; 179 int vx_pending; /* # of pending aux buffers */ 180 int vx_flags; 181 #define VX_BUSY 1 182 #define VX_DEAD 2 183 }; 184 185 struct vndbuf { 186 struct buf vb_buf; 187 struct vndxfer *vb_xfer; 188 }; 189 190 191 /* 192 * We keep a of pool vndbuf's and vndxfer structures. 193 */ 194 struct pool vndxfer_pool; 195 struct pool vndbuf_pool; 196 197 #define getvndxfer(vnx) do { \ 198 int s = splbio(); \ 199 vnx = pool_get(&vndxfer_pool, PR_WAITOK); \ 200 splx(s); \ 201 } while (0) 202 203 #define putvndxfer(vnx) { \ 204 pool_put(&vndxfer_pool, (void *)(vnx)); \ 205 } 206 207 #define getvndbuf(vbp) do { \ 208 int s = splbio(); \ 209 vbp = pool_get(&vndbuf_pool, PR_WAITOK); \ 210 splx(s); \ 211 } while (0) 212 213 #define putvndbuf(vbp) { \ 214 pool_put(&vndbuf_pool, (void *)(vbp)); \ 215 } 216 217 /* 218 * local variables 219 */ 220 static struct extent *swapmap; /* controls the mapping of /dev/drum */ 221 222 /* list of all active swap devices [by priority] */ 223 LIST_HEAD(swap_priority, swappri); 224 static struct swap_priority swap_priority; 225 226 /* locks */ 227 struct rwlock swap_syscall_lock = RWLOCK_INITIALIZER("swplk"); 228 229 /* 230 * prototypes 231 */ 232 static void swapdrum_add(struct swapdev *, int); 233 static struct swapdev *swapdrum_getsdp(int); 234 235 static struct swapdev *swaplist_find(struct vnode *, int); 236 static void swaplist_insert(struct swapdev *, 237 struct swappri *, int); 238 static void swaplist_trim(void); 239 240 static int swap_on(struct proc *, struct swapdev *); 241 static int swap_off(struct proc *, struct swapdev *); 242 243 static void sw_reg_strategy(struct swapdev *, struct buf *, int); 244 static void sw_reg_iodone(struct buf *); 245 static void sw_reg_start(struct swapdev *); 246 247 static int uvm_swap_io(struct vm_page **, int, int, int); 248 249 static void swapmount(void); 250 251 #ifdef UVM_SWAP_ENCRYPT 252 /* for swap encrypt */ 253 boolean_t uvm_swap_allocpages(struct vm_page **, int); 254 void uvm_swap_markdecrypt(struct swapdev *, int, int, int); 255 boolean_t uvm_swap_needdecrypt(struct swapdev *, int); 256 void uvm_swap_initcrypt(struct swapdev *, int); 257 #endif 258 259 /* 260 * uvm_swap_init: init the swap system data structures and locks 261 * 262 * => called at boot time from init_main.c after the filesystems 263 * are brought up (which happens after uvm_init()) 264 */ 265 void 266 uvm_swap_init(void) 267 { 268 UVMHIST_FUNC("uvm_swap_init"); 269 270 UVMHIST_CALLED(pdhist); 271 /* 272 * first, init the swap list, its counter, and its lock. 273 * then get a handle on the vnode for /dev/drum by using 274 * the its dev_t number ("swapdev", from MD conf.c). 275 */ 276 277 LIST_INIT(&swap_priority); 278 uvmexp.nswapdev = 0; 279 simple_lock_init(&uvm.swap_data_lock); 280 281 if (!swapdev_vp && bdevvp(swapdev, &swapdev_vp)) 282 panic("uvm_swap_init: can't get vnode for swap device"); 283 284 /* 285 * create swap block resource map to map /dev/drum. the range 286 * from 1 to INT_MAX allows 2 gigablocks of swap space. note 287 * that block 0 is reserved (used to indicate an allocation 288 * failure, or no allocation). 289 */ 290 swapmap = extent_create("swapmap", 1, INT_MAX, 291 M_VMSWAP, 0, 0, EX_NOWAIT); 292 if (swapmap == 0) 293 panic("uvm_swap_init: extent_create failed"); 294 295 /* 296 * allocate pools for structures used for swapping to files. 297 */ 298 299 300 pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, 0, 0, "swp vnx", 301 NULL); 302 303 pool_init(&vndbuf_pool, sizeof(struct vndbuf), 0, 0, 0, "swp vnd", 304 NULL); 305 306 /* 307 * Setup the initial swap partition 308 */ 309 swapmount(); 310 311 /* 312 * done! 313 */ 314 UVMHIST_LOG(pdhist, "<- done", 0, 0, 0, 0); 315 } 316 317 #ifdef UVM_SWAP_ENCRYPT 318 void 319 uvm_swap_initcrypt_all(void) 320 { 321 struct swapdev *sdp; 322 struct swappri *spp; 323 int npages; 324 325 simple_lock(&uvm.swap_data_lock); 326 327 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 328 CIRCLEQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) 329 if (sdp->swd_decrypt == NULL) { 330 npages = dbtob((uint64_t)sdp->swd_nblks) >> 331 PAGE_SHIFT; 332 uvm_swap_initcrypt(sdp, npages); 333 } 334 } 335 simple_unlock(&uvm.swap_data_lock); 336 } 337 338 void 339 uvm_swap_initcrypt(struct swapdev *sdp, int npages) 340 { 341 /* 342 * keep information if a page needs to be decrypted when we get it 343 * from the swap device. 344 * We cannot chance a malloc later, if we are doing ASYNC puts, 345 * we may not call malloc with M_WAITOK. This consumes only 346 * 8KB memory for a 256MB swap partition. 347 */ 348 sdp->swd_decrypt = malloc(SWD_DCRYPT_SIZE(npages), M_VMSWAP, 349 M_WAITOK|M_ZERO); 350 sdp->swd_keys = malloc(SWD_KEY_SIZE(npages) * sizeof(struct swap_key), 351 M_VMSWAP, M_WAITOK|M_ZERO); 352 } 353 354 boolean_t 355 uvm_swap_allocpages(struct vm_page **pps, int npages) 356 { 357 int i; 358 boolean_t fail; 359 360 /* Estimate if we will succeed */ 361 uvm_lock_fpageq(); 362 363 fail = uvmexp.free - npages < uvmexp.reserve_kernel; 364 365 uvm_unlock_fpageq(); 366 367 if (fail) 368 return FALSE; 369 370 /* Get new pages */ 371 for (i = 0; i < npages; i++) { 372 pps[i] = uvm_pagealloc(NULL, 0, NULL, 0); 373 if (pps[i] == NULL) 374 break; 375 } 376 377 /* On failure free and return */ 378 if (i < npages) { 379 uvm_swap_freepages(pps, i); 380 return FALSE; 381 } 382 383 return TRUE; 384 } 385 386 void 387 uvm_swap_freepages(struct vm_page **pps, int npages) 388 { 389 int i; 390 391 uvm_lock_pageq(); 392 for (i = 0; i < npages; i++) 393 uvm_pagefree(pps[i]); 394 uvm_unlock_pageq(); 395 } 396 397 /* 398 * Mark pages on the swap device for later decryption 399 */ 400 401 void 402 uvm_swap_markdecrypt(struct swapdev *sdp, int startslot, int npages, 403 int decrypt) 404 { 405 int pagestart, i; 406 int off, bit; 407 408 if (!sdp) 409 return; 410 411 pagestart = startslot - sdp->swd_drumoffset; 412 for (i = 0; i < npages; i++, pagestart++) { 413 off = SWD_DCRYPT_OFF(pagestart); 414 bit = SWD_DCRYPT_BIT(pagestart); 415 if (decrypt) 416 /* pages read need decryption */ 417 sdp->swd_decrypt[off] |= 1 << bit; 418 else 419 /* pages read do not need decryption */ 420 sdp->swd_decrypt[off] &= ~(1 << bit); 421 } 422 } 423 424 /* 425 * Check if the page that we got from disk needs to be decrypted 426 */ 427 428 boolean_t 429 uvm_swap_needdecrypt(struct swapdev *sdp, int off) 430 { 431 if (!sdp) 432 return FALSE; 433 434 off -= sdp->swd_drumoffset; 435 return sdp->swd_decrypt[SWD_DCRYPT_OFF(off)] & (1 << SWD_DCRYPT_BIT(off)) ? 436 TRUE : FALSE; 437 } 438 439 void 440 uvm_swap_finicrypt_all(void) 441 { 442 struct swapdev *sdp; 443 struct swappri *spp; 444 struct swap_key *key; 445 unsigned int nkeys; 446 447 simple_lock(&uvm.swap_data_lock); 448 449 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 450 CIRCLEQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) { 451 if (sdp->swd_decrypt == NULL) 452 continue; 453 454 nkeys = dbtob((uint64_t)sdp->swd_nblks) >> PAGE_SHIFT; 455 key = sdp->swd_keys + (SWD_KEY_SIZE(nkeys) - 1); 456 do { 457 if (key->refcount != 0) 458 swap_key_delete(key); 459 } while (key-- != sdp->swd_keys); 460 } 461 } 462 simple_unlock(&uvm.swap_data_lock); 463 } 464 #endif /* UVM_SWAP_ENCRYPT */ 465 466 /* 467 * swaplist functions: functions that operate on the list of swap 468 * devices on the system. 469 */ 470 471 /* 472 * swaplist_insert: insert swap device "sdp" into the global list 473 * 474 * => caller must hold both swap_syscall_lock and uvm.swap_data_lock 475 * => caller must provide a newly malloc'd swappri structure (we will 476 * FREE it if we don't need it... this it to prevent malloc blocking 477 * here while adding swap) 478 */ 479 static void 480 swaplist_insert(struct swapdev *sdp, struct swappri *newspp, int priority) 481 { 482 struct swappri *spp, *pspp; 483 UVMHIST_FUNC("swaplist_insert"); UVMHIST_CALLED(pdhist); 484 485 /* 486 * find entry at or after which to insert the new device. 487 */ 488 for (pspp = NULL, spp = LIST_FIRST(&swap_priority); spp != NULL; 489 spp = LIST_NEXT(spp, spi_swappri)) { 490 if (priority <= spp->spi_priority) 491 break; 492 pspp = spp; 493 } 494 495 /* 496 * new priority? 497 */ 498 if (spp == NULL || spp->spi_priority != priority) { 499 spp = newspp; /* use newspp! */ 500 UVMHIST_LOG(pdhist, "created new swappri = %ld", 501 priority, 0, 0, 0); 502 503 spp->spi_priority = priority; 504 CIRCLEQ_INIT(&spp->spi_swapdev); 505 506 if (pspp) 507 LIST_INSERT_AFTER(pspp, spp, spi_swappri); 508 else 509 LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri); 510 } else { 511 /* we don't need a new priority structure, free it */ 512 free(newspp, M_VMSWAP); 513 } 514 515 /* 516 * priority found (or created). now insert on the priority's 517 * circleq list and bump the total number of swapdevs. 518 */ 519 sdp->swd_priority = priority; 520 CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); 521 uvmexp.nswapdev++; 522 } 523 524 /* 525 * swaplist_find: find and optionally remove a swap device from the 526 * global list. 527 * 528 * => caller must hold both swap_syscall_lock and uvm.swap_data_lock 529 * => we return the swapdev we found (and removed) 530 */ 531 static struct swapdev * 532 swaplist_find(struct vnode *vp, boolean_t remove) 533 { 534 struct swapdev *sdp; 535 struct swappri *spp; 536 537 /* 538 * search the lists for the requested vp 539 */ 540 for (spp = LIST_FIRST(&swap_priority); spp != NULL; 541 spp = LIST_NEXT(spp, spi_swappri)) { 542 for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev); 543 sdp != (void *)&spp->spi_swapdev; 544 sdp = CIRCLEQ_NEXT(sdp, swd_next)) 545 if (sdp->swd_vp == vp) { 546 if (remove) { 547 CIRCLEQ_REMOVE(&spp->spi_swapdev, 548 sdp, swd_next); 549 uvmexp.nswapdev--; 550 } 551 return(sdp); 552 } 553 } 554 return (NULL); 555 } 556 557 558 /* 559 * swaplist_trim: scan priority list for empty priority entries and kill 560 * them. 561 * 562 * => caller must hold both swap_syscall_lock and uvm.swap_data_lock 563 */ 564 static void 565 swaplist_trim(void) 566 { 567 struct swappri *spp, *nextspp; 568 569 for (spp = LIST_FIRST(&swap_priority); spp != NULL; spp = nextspp) { 570 nextspp = LIST_NEXT(spp, spi_swappri); 571 if (CIRCLEQ_FIRST(&spp->spi_swapdev) != 572 (void *)&spp->spi_swapdev) 573 continue; 574 LIST_REMOVE(spp, spi_swappri); 575 free(spp, M_VMSWAP); 576 } 577 } 578 579 /* 580 * swapdrum_add: add a "swapdev"'s blocks into /dev/drum's area. 581 * 582 * => caller must hold swap_syscall_lock 583 * => uvm.swap_data_lock should be unlocked (we may sleep) 584 */ 585 static void 586 swapdrum_add(struct swapdev *sdp, int npages) 587 { 588 u_long result; 589 590 if (extent_alloc(swapmap, npages, EX_NOALIGN, 0, EX_NOBOUNDARY, 591 EX_WAITOK, &result)) 592 panic("swapdrum_add"); 593 594 sdp->swd_drumoffset = result; 595 sdp->swd_drumsize = npages; 596 } 597 598 /* 599 * swapdrum_getsdp: given a page offset in /dev/drum, convert it back 600 * to the "swapdev" that maps that section of the drum. 601 * 602 * => each swapdev takes one big contig chunk of the drum 603 * => caller must hold uvm.swap_data_lock 604 */ 605 static struct swapdev * 606 swapdrum_getsdp(int pgno) 607 { 608 struct swapdev *sdp; 609 struct swappri *spp; 610 611 for (spp = LIST_FIRST(&swap_priority); spp != NULL; 612 spp = LIST_NEXT(spp, spi_swappri)) 613 for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev); 614 sdp != (void *)&spp->spi_swapdev; 615 sdp = CIRCLEQ_NEXT(sdp, swd_next)) 616 if (pgno >= sdp->swd_drumoffset && 617 pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) { 618 return sdp; 619 } 620 return NULL; 621 } 622 623 624 /* 625 * sys_swapctl: main entry point for swapctl(2) system call 626 * [with two helper functions: swap_on and swap_off] 627 */ 628 int 629 sys_swapctl(struct proc *p, void *v, register_t *retval) 630 { 631 struct sys_swapctl_args /* { 632 syscallarg(int) cmd; 633 syscallarg(void *) arg; 634 syscallarg(int) misc; 635 } */ *uap = (struct sys_swapctl_args *)v; 636 struct vnode *vp; 637 struct nameidata nd; 638 struct swappri *spp; 639 struct swapdev *sdp; 640 struct swapent *sep; 641 char userpath[MAXPATHLEN]; 642 size_t len; 643 int count, error, misc; 644 int priority; 645 UVMHIST_FUNC("sys_swapctl"); UVMHIST_CALLED(pdhist); 646 647 misc = SCARG(uap, misc); 648 649 /* 650 * ensure serialized syscall access by grabbing the swap_syscall_lock 651 */ 652 rw_enter_write(&swap_syscall_lock); 653 654 /* 655 * we handle the non-priv NSWAP and STATS request first. 656 * 657 * SWAP_NSWAP: return number of config'd swap devices 658 * [can also be obtained with uvmexp sysctl] 659 */ 660 if (SCARG(uap, cmd) == SWAP_NSWAP) { 661 UVMHIST_LOG(pdhist, "<- done SWAP_NSWAP=%ld", uvmexp.nswapdev, 662 0, 0, 0); 663 *retval = uvmexp.nswapdev; 664 error = 0; 665 goto out; 666 } 667 668 /* 669 * SWAP_STATS: get stats on current # of configured swap devs 670 * 671 * note that the swap_priority list can't change as long 672 * as we are holding the swap_syscall_lock. we don't want 673 * to grab the uvm.swap_data_lock because we may fault&sleep during 674 * copyout() and we don't want to be holding that lock then! 675 */ 676 if (SCARG(uap, cmd) == SWAP_STATS) { 677 sep = (struct swapent *)SCARG(uap, arg); 678 count = 0; 679 680 for (spp = LIST_FIRST(&swap_priority); spp != NULL; 681 spp = LIST_NEXT(spp, spi_swappri)) { 682 for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev); 683 sdp != (void *)&spp->spi_swapdev && misc-- > 0; 684 sdp = CIRCLEQ_NEXT(sdp, swd_next)) { 685 sdp->swd_inuse = 686 btodb((u_int64_t)sdp->swd_npginuse << 687 PAGE_SHIFT); 688 error = copyout(&sdp->swd_se, sep, 689 sizeof(struct swapent)); 690 691 /* now copy out the path if necessary */ 692 if (error == 0) 693 error = copyout(sdp->swd_path, 694 &sep->se_path, sdp->swd_pathlen); 695 696 if (error) 697 goto out; 698 count++; 699 sep++; 700 } 701 } 702 703 UVMHIST_LOG(pdhist, "<- done SWAP_STATS", 0, 0, 0, 0); 704 705 *retval = count; 706 error = 0; 707 goto out; 708 } 709 710 /* 711 * all other requests require superuser privs. verify. 712 */ 713 if ((error = suser(p, 0))) 714 goto out; 715 716 /* 717 * at this point we expect a path name in arg. we will 718 * use namei() to gain a vnode reference (vref), and lock 719 * the vnode (VOP_LOCK). 720 * 721 * XXX: a NULL arg means use the root vnode pointer (e.g. for 722 * miniroot) 723 */ 724 if (SCARG(uap, arg) == NULL) { 725 vp = rootvp; /* miniroot */ 726 if (vget(vp, LK_EXCLUSIVE, p)) { 727 error = EBUSY; 728 goto out; 729 } 730 if (SCARG(uap, cmd) == SWAP_ON && 731 copystr("miniroot", userpath, sizeof userpath, &len)) 732 panic("swapctl: miniroot copy failed"); 733 } else { 734 int space; 735 char *where; 736 737 if (SCARG(uap, cmd) == SWAP_ON) { 738 if ((error = copyinstr(SCARG(uap, arg), userpath, 739 sizeof userpath, &len))) 740 goto out; 741 space = UIO_SYSSPACE; 742 where = userpath; 743 } else { 744 space = UIO_USERSPACE; 745 where = (char *)SCARG(uap, arg); 746 } 747 NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, space, where, p); 748 if ((error = namei(&nd))) 749 goto out; 750 vp = nd.ni_vp; 751 } 752 /* note: "vp" is referenced and locked */ 753 754 error = 0; /* assume no error */ 755 switch(SCARG(uap, cmd)) { 756 757 case SWAP_DUMPDEV: 758 if (vp->v_type != VBLK) { 759 error = ENOTBLK; 760 break; 761 } 762 dumpdev = vp->v_rdev; 763 break; 764 765 case SWAP_CTL: 766 /* 767 * get new priority, remove old entry (if any) and then 768 * reinsert it in the correct place. finally, prune out 769 * any empty priority structures. 770 */ 771 priority = SCARG(uap, misc); 772 spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK); 773 simple_lock(&uvm.swap_data_lock); 774 if ((sdp = swaplist_find(vp, 1)) == NULL) { 775 error = ENOENT; 776 } else { 777 swaplist_insert(sdp, spp, priority); 778 swaplist_trim(); 779 } 780 simple_unlock(&uvm.swap_data_lock); 781 if (error) 782 free(spp, M_VMSWAP); 783 break; 784 785 case SWAP_ON: 786 787 /* 788 * check for duplicates. if none found, then insert a 789 * dummy entry on the list to prevent someone else from 790 * trying to enable this device while we are working on 791 * it. 792 */ 793 794 priority = SCARG(uap, misc); 795 simple_lock(&uvm.swap_data_lock); 796 if ((sdp = swaplist_find(vp, 0)) != NULL) { 797 error = EBUSY; 798 simple_unlock(&uvm.swap_data_lock); 799 break; 800 } 801 sdp = malloc(sizeof *sdp, M_VMSWAP, M_WAITOK|M_ZERO); 802 spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK); 803 sdp->swd_flags = SWF_FAKE; /* placeholder only */ 804 sdp->swd_vp = vp; 805 sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV; 806 807 /* 808 * XXX Is NFS elaboration necessary? 809 */ 810 if (vp->v_type == VREG) { 811 sdp->swd_cred = crdup(p->p_ucred); 812 } 813 814 swaplist_insert(sdp, spp, priority); 815 simple_unlock(&uvm.swap_data_lock); 816 817 sdp->swd_pathlen = len; 818 sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK); 819 if (copystr(userpath, sdp->swd_path, sdp->swd_pathlen, 0) != 0) 820 panic("swapctl: copystr"); 821 822 /* 823 * we've now got a FAKE placeholder in the swap list. 824 * now attempt to enable swap on it. if we fail, undo 825 * what we've done and kill the fake entry we just inserted. 826 * if swap_on is a success, it will clear the SWF_FAKE flag 827 */ 828 829 if ((error = swap_on(p, sdp)) != 0) { 830 simple_lock(&uvm.swap_data_lock); 831 (void) swaplist_find(vp, 1); /* kill fake entry */ 832 swaplist_trim(); 833 simple_unlock(&uvm.swap_data_lock); 834 if (vp->v_type == VREG) { 835 crfree(sdp->swd_cred); 836 } 837 free(sdp->swd_path, M_VMSWAP); 838 free(sdp, M_VMSWAP); 839 break; 840 } 841 break; 842 843 case SWAP_OFF: 844 simple_lock(&uvm.swap_data_lock); 845 if ((sdp = swaplist_find(vp, 0)) == NULL) { 846 simple_unlock(&uvm.swap_data_lock); 847 error = ENXIO; 848 break; 849 } 850 851 /* 852 * If a device isn't in use or enabled, we 853 * can't stop swapping from it (again). 854 */ 855 if ((sdp->swd_flags & (SWF_INUSE|SWF_ENABLE)) == 0) { 856 simple_unlock(&uvm.swap_data_lock); 857 error = EBUSY; 858 break; 859 } 860 861 /* 862 * do the real work. 863 */ 864 error = swap_off(p, sdp); 865 break; 866 867 default: 868 error = EINVAL; 869 } 870 871 /* 872 * done! release the ref gained by namei() and unlock. 873 */ 874 vput(vp); 875 876 out: 877 rw_exit_write(&swap_syscall_lock); 878 879 UVMHIST_LOG(pdhist, "<- done! error=%ld", error, 0, 0, 0); 880 return (error); 881 } 882 883 /* 884 * swap_on: attempt to enable a swapdev for swapping. note that the 885 * swapdev is already on the global list, but disabled (marked 886 * SWF_FAKE). 887 * 888 * => we avoid the start of the disk (to protect disk labels) 889 * => we also avoid the miniroot, if we are swapping to root. 890 * => caller should leave uvm.swap_data_lock unlocked, we may lock it 891 * if needed. 892 */ 893 static int 894 swap_on(struct proc *p, struct swapdev *sdp) 895 { 896 static int count = 0; /* static */ 897 struct vnode *vp; 898 int error, npages, nblocks, size; 899 long addr; 900 struct vattr va; 901 #if defined(NFSCLIENT) 902 extern int (**nfsv2_vnodeop_p)(void *); 903 #endif /* defined(NFSCLIENT) */ 904 dev_t dev; 905 UVMHIST_FUNC("swap_on"); UVMHIST_CALLED(pdhist); 906 907 /* 908 * we want to enable swapping on sdp. the swd_vp contains 909 * the vnode we want (locked and ref'd), and the swd_dev 910 * contains the dev_t of the file, if it a block device. 911 */ 912 913 vp = sdp->swd_vp; 914 dev = sdp->swd_dev; 915 916 /* 917 * open the swap file (mostly useful for block device files to 918 * let device driver know what is up). 919 * 920 * we skip the open/close for root on swap because the root 921 * has already been opened when root was mounted (mountroot). 922 */ 923 if (vp != rootvp) { 924 if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p))) 925 return (error); 926 } 927 928 /* XXX this only works for block devices */ 929 UVMHIST_LOG(pdhist, " dev=%ld, major(dev)=%ld", dev, major(dev), 0,0); 930 931 /* 932 * we now need to determine the size of the swap area. for 933 * block specials we can call the d_psize function. 934 * for normal files, we must stat [get attrs]. 935 * 936 * we put the result in nblks. 937 * for normal files, we also want the filesystem block size 938 * (which we get with statfs). 939 */ 940 switch (vp->v_type) { 941 case VBLK: 942 if (bdevsw[major(dev)].d_psize == 0 || 943 (nblocks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) { 944 error = ENXIO; 945 goto bad; 946 } 947 break; 948 949 case VREG: 950 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p))) 951 goto bad; 952 nblocks = (int)btodb(va.va_size); 953 if ((error = 954 VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0) 955 goto bad; 956 957 sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize; 958 /* 959 * limit the max # of outstanding I/O requests we issue 960 * at any one time. take it easy on NFS servers. 961 */ 962 #if defined(NFSCLIENT) 963 if (vp->v_op == nfsv2_vnodeop_p) 964 sdp->swd_maxactive = 2; /* XXX */ 965 else 966 #endif /* defined(NFSCLIENT) */ 967 sdp->swd_maxactive = 8; /* XXX */ 968 break; 969 970 default: 971 error = ENXIO; 972 goto bad; 973 } 974 975 /* 976 * save nblocks in a safe place and convert to pages. 977 */ 978 979 sdp->swd_nblks = nblocks; 980 npages = dbtob((u_int64_t)nblocks) >> PAGE_SHIFT; 981 982 /* 983 * for block special files, we want to make sure that leave 984 * the disklabel and bootblocks alone, so we arrange to skip 985 * over them (arbitrarily choosing to skip PAGE_SIZE bytes). 986 * note that because of this the "size" can be less than the 987 * actual number of blocks on the device. 988 */ 989 if (vp->v_type == VBLK) { 990 /* we use pages 1 to (size - 1) [inclusive] */ 991 size = npages - 1; 992 addr = 1; 993 } else { 994 /* we use pages 0 to (size - 1) [inclusive] */ 995 size = npages; 996 addr = 0; 997 } 998 999 /* 1000 * make sure we have enough blocks for a reasonable sized swap 1001 * area. we want at least one page. 1002 */ 1003 1004 if (size < 1) { 1005 UVMHIST_LOG(pdhist, " size <= 1!!", 0, 0, 0, 0); 1006 error = EINVAL; 1007 goto bad; 1008 } 1009 1010 UVMHIST_LOG(pdhist, " dev=%lx: size=%ld addr=0x%lx\n", 1011 dev, size, addr, 0); 1012 1013 /* 1014 * now we need to allocate an extent to manage this swap device 1015 */ 1016 snprintf(sdp->swd_exname, sizeof(sdp->swd_exname), "swap0x%04x", 1017 count++); 1018 1019 /* note that extent_create's 3rd arg is inclusive, thus "- 1" */ 1020 sdp->swd_ex = extent_create(sdp->swd_exname, 0, npages - 1, M_VMSWAP, 1021 0, 0, EX_WAITOK); 1022 /* allocate the `saved' region from the extent so it won't be used */ 1023 if (addr) { 1024 if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK)) 1025 panic("disklabel region"); 1026 } 1027 1028 /* 1029 * if the vnode we are swapping to is the root vnode 1030 * (i.e. we are swapping to the miniroot) then we want 1031 * to make sure we don't overwrite it. do a statfs to 1032 * find its size and skip over it. 1033 */ 1034 if (vp == rootvp) { 1035 struct mount *mp; 1036 struct statfs *sp; 1037 int rootblocks, rootpages; 1038 1039 mp = rootvnode->v_mount; 1040 sp = &mp->mnt_stat; 1041 rootblocks = sp->f_blocks * btodb(sp->f_bsize); 1042 rootpages = round_page(dbtob((u_int64_t)rootblocks)) 1043 >> PAGE_SHIFT; 1044 if (rootpages >= size) 1045 panic("swap_on: miniroot larger than swap?"); 1046 1047 if (extent_alloc_region(sdp->swd_ex, addr, 1048 rootpages, EX_WAITOK)) 1049 panic("swap_on: unable to preserve miniroot"); 1050 1051 size -= rootpages; 1052 printf("Preserved %d pages of miniroot ", rootpages); 1053 printf("leaving %d pages of swap\n", size); 1054 } 1055 1056 /* 1057 * add a ref to vp to reflect usage as a swap device. 1058 */ 1059 vref(vp); 1060 1061 #ifdef UVM_SWAP_ENCRYPT 1062 if (uvm_doswapencrypt) 1063 uvm_swap_initcrypt(sdp, npages); 1064 #endif 1065 /* 1066 * now add the new swapdev to the drum and enable. 1067 */ 1068 simple_lock(&uvm.swap_data_lock); 1069 swapdrum_add(sdp, npages); 1070 sdp->swd_npages = size; 1071 sdp->swd_flags &= ~SWF_FAKE; /* going live */ 1072 sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE); 1073 uvmexp.swpages += size; 1074 simple_unlock(&uvm.swap_data_lock); 1075 return (0); 1076 1077 bad: 1078 /* 1079 * failure: close device if necessary and return error. 1080 */ 1081 if (vp != rootvp) 1082 (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); 1083 return (error); 1084 } 1085 1086 /* 1087 * swap_off: stop swapping on swapdev 1088 * 1089 * => swap data should be locked, we will unlock. 1090 */ 1091 static int 1092 swap_off(struct proc *p, struct swapdev *sdp) 1093 { 1094 int error = 0; 1095 UVMHIST_FUNC("swap_off"); UVMHIST_CALLED(pdhist); 1096 UVMHIST_LOG(pdhist, " dev=%lx", sdp->swd_dev,0,0,0); 1097 1098 /* disable the swap area being removed */ 1099 sdp->swd_flags &= ~SWF_ENABLE; 1100 simple_unlock(&uvm.swap_data_lock); 1101 1102 /* 1103 * the idea is to find all the pages that are paged out to this 1104 * device, and page them all in. in uvm, swap-backed pageable 1105 * memory can take two forms: aobjs and anons. call the 1106 * swapoff hook for each subsystem to bring in pages. 1107 */ 1108 1109 if (uao_swap_off(sdp->swd_drumoffset, 1110 sdp->swd_drumoffset + sdp->swd_drumsize) || 1111 amap_swap_off(sdp->swd_drumoffset, 1112 sdp->swd_drumoffset + sdp->swd_drumsize)) { 1113 1114 error = ENOMEM; 1115 } else if (sdp->swd_npginuse > sdp->swd_npgbad) { 1116 error = EBUSY; 1117 } 1118 1119 if (error) { 1120 simple_lock(&uvm.swap_data_lock); 1121 sdp->swd_flags |= SWF_ENABLE; 1122 simple_unlock(&uvm.swap_data_lock); 1123 return (error); 1124 } 1125 1126 /* 1127 * done with the vnode and saved creds. 1128 * drop our ref on the vnode before calling VOP_CLOSE() 1129 * so that spec_close() can tell if this is the last close. 1130 */ 1131 if (sdp->swd_vp->v_type == VREG) { 1132 crfree(sdp->swd_cred); 1133 } 1134 vrele(sdp->swd_vp); 1135 if (sdp->swd_vp != rootvp) { 1136 (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p); 1137 } 1138 1139 simple_lock(&uvm.swap_data_lock); 1140 uvmexp.swpages -= sdp->swd_npages; 1141 1142 if (swaplist_find(sdp->swd_vp, 1) == NULL) 1143 panic("swap_off: swapdev not in list"); 1144 swaplist_trim(); 1145 1146 /* 1147 * free all resources! 1148 */ 1149 extent_free(swapmap, sdp->swd_drumoffset, sdp->swd_drumsize, 1150 EX_WAITOK); 1151 extent_destroy(sdp->swd_ex); 1152 free(sdp, M_VMSWAP); 1153 simple_unlock(&uvm.swap_data_lock); 1154 return (0); 1155 } 1156 1157 /* 1158 * /dev/drum interface and i/o functions 1159 */ 1160 1161 /* 1162 * swstrategy: perform I/O on the drum 1163 * 1164 * => we must map the i/o request from the drum to the correct swapdev. 1165 */ 1166 void 1167 swstrategy(struct buf *bp) 1168 { 1169 struct swapdev *sdp; 1170 int s, pageno, bn; 1171 UVMHIST_FUNC("swstrategy"); UVMHIST_CALLED(pdhist); 1172 1173 /* 1174 * convert block number to swapdev. note that swapdev can't 1175 * be yanked out from under us because we are holding resources 1176 * in it (i.e. the blocks we are doing I/O on). 1177 */ 1178 pageno = dbtob((u_int64_t)bp->b_blkno) >> PAGE_SHIFT; 1179 simple_lock(&uvm.swap_data_lock); 1180 sdp = swapdrum_getsdp(pageno); 1181 simple_unlock(&uvm.swap_data_lock); 1182 if (sdp == NULL) { 1183 bp->b_error = EINVAL; 1184 bp->b_flags |= B_ERROR; 1185 s = splbio(); 1186 biodone(bp); 1187 splx(s); 1188 UVMHIST_LOG(pdhist, " failed to get swap device", 0, 0, 0, 0); 1189 return; 1190 } 1191 1192 /* 1193 * convert drum page number to block number on this swapdev. 1194 */ 1195 1196 pageno -= sdp->swd_drumoffset; /* page # on swapdev */ 1197 bn = btodb((u_int64_t)pageno << PAGE_SHIFT); /* convert to diskblock */ 1198 1199 UVMHIST_LOG(pdhist, " %s: mapoff=%lx bn=0x%lx bcount=%ld", 1200 ((bp->b_flags & B_READ) == 0) ? "write" : "read", 1201 sdp->swd_drumoffset, bn, bp->b_bcount); 1202 1203 /* 1204 * for block devices we finish up here. 1205 * for regular files we have to do more work which we delegate 1206 * to sw_reg_strategy(). 1207 */ 1208 1209 switch (sdp->swd_vp->v_type) { 1210 default: 1211 panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type); 1212 1213 case VBLK: 1214 1215 /* 1216 * must convert "bp" from an I/O on /dev/drum to an I/O 1217 * on the swapdev (sdp). 1218 */ 1219 s = splbio(); 1220 buf_replacevnode(bp, sdp->swd_vp); 1221 1222 bp->b_blkno = bn; 1223 splx(s); 1224 VOP_STRATEGY(bp); 1225 return; 1226 1227 case VREG: 1228 /* 1229 * delegate to sw_reg_strategy function. 1230 */ 1231 sw_reg_strategy(sdp, bp, bn); 1232 return; 1233 } 1234 /* NOTREACHED */ 1235 } 1236 1237 /* 1238 * sw_reg_strategy: handle swap i/o to regular files 1239 */ 1240 static void 1241 sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn) 1242 { 1243 struct vnode *vp; 1244 struct vndxfer *vnx; 1245 daddr64_t nbn; 1246 caddr_t addr; 1247 off_t byteoff; 1248 int s, off, nra, error, sz, resid; 1249 UVMHIST_FUNC("sw_reg_strategy"); UVMHIST_CALLED(pdhist); 1250 1251 /* 1252 * allocate a vndxfer head for this transfer and point it to 1253 * our buffer. 1254 */ 1255 getvndxfer(vnx); 1256 vnx->vx_flags = VX_BUSY; 1257 vnx->vx_error = 0; 1258 vnx->vx_pending = 0; 1259 vnx->vx_bp = bp; 1260 vnx->vx_sdp = sdp; 1261 1262 /* 1263 * setup for main loop where we read filesystem blocks into 1264 * our buffer. 1265 */ 1266 error = 0; 1267 bp->b_resid = bp->b_bcount; /* nothing transferred yet! */ 1268 addr = bp->b_data; /* current position in buffer */ 1269 byteoff = dbtob((u_int64_t)bn); 1270 1271 for (resid = bp->b_resid; resid; resid -= sz) { 1272 struct vndbuf *nbp; 1273 1274 /* 1275 * translate byteoffset into block number. return values: 1276 * vp = vnode of underlying device 1277 * nbn = new block number (on underlying vnode dev) 1278 * nra = num blocks we can read-ahead (excludes requested 1279 * block) 1280 */ 1281 nra = 0; 1282 error = VOP_BMAP(sdp->swd_vp, byteoff / sdp->swd_bsize, 1283 &vp, &nbn, &nra); 1284 1285 if (error == 0 && nbn == (daddr64_t)-1) { 1286 /* 1287 * this used to just set error, but that doesn't 1288 * do the right thing. Instead, it causes random 1289 * memory errors. The panic() should remain until 1290 * this condition doesn't destabilize the system. 1291 */ 1292 #if 1 1293 panic("sw_reg_strategy: swap to sparse file"); 1294 #else 1295 error = EIO; /* failure */ 1296 #endif 1297 } 1298 1299 /* 1300 * punt if there was an error or a hole in the file. 1301 * we must wait for any i/o ops we have already started 1302 * to finish before returning. 1303 * 1304 * XXX we could deal with holes here but it would be 1305 * a hassle (in the write case). 1306 */ 1307 if (error) { 1308 s = splbio(); 1309 vnx->vx_error = error; /* pass error up */ 1310 goto out; 1311 } 1312 1313 /* 1314 * compute the size ("sz") of this transfer (in bytes). 1315 */ 1316 off = byteoff % sdp->swd_bsize; 1317 sz = (1 + nra) * sdp->swd_bsize - off; 1318 if (sz > resid) 1319 sz = resid; 1320 1321 UVMHIST_LOG(pdhist, "sw_reg_strategy: " 1322 "vp %p/%p offset 0x%lx/0x%llx", 1323 sdp->swd_vp, vp, (u_long)byteoff, nbn); 1324 1325 /* 1326 * now get a buf structure. note that the vb_buf is 1327 * at the front of the nbp structure so that you can 1328 * cast pointers between the two structure easily. 1329 */ 1330 getvndbuf(nbp); 1331 nbp->vb_buf.b_flags = bp->b_flags | B_CALL; 1332 nbp->vb_buf.b_bcount = sz; 1333 nbp->vb_buf.b_bufsize = sz; 1334 nbp->vb_buf.b_error = 0; 1335 nbp->vb_buf.b_data = addr; 1336 nbp->vb_buf.b_blkno = nbn + btodb(off); 1337 nbp->vb_buf.b_proc = bp->b_proc; 1338 nbp->vb_buf.b_iodone = sw_reg_iodone; 1339 nbp->vb_buf.b_vp = NULLVP; 1340 nbp->vb_buf.b_vnbufs.le_next = NOLIST; 1341 LIST_INIT(&nbp->vb_buf.b_dep); 1342 1343 /* 1344 * set b_dirtyoff/end and b_validoff/end. this is 1345 * required by the NFS client code (otherwise it will 1346 * just discard our I/O request). 1347 */ 1348 if (bp->b_dirtyend == 0) { 1349 nbp->vb_buf.b_dirtyoff = 0; 1350 nbp->vb_buf.b_dirtyend = sz; 1351 } else { 1352 nbp->vb_buf.b_dirtyoff = 1353 max(0, bp->b_dirtyoff - (bp->b_bcount-resid)); 1354 nbp->vb_buf.b_dirtyend = 1355 min(sz, 1356 max(0, bp->b_dirtyend - (bp->b_bcount-resid))); 1357 } 1358 if (bp->b_validend == 0) { 1359 nbp->vb_buf.b_validoff = 0; 1360 nbp->vb_buf.b_validend = sz; 1361 } else { 1362 nbp->vb_buf.b_validoff = 1363 max(0, bp->b_validoff - (bp->b_bcount-resid)); 1364 nbp->vb_buf.b_validend = 1365 min(sz, 1366 max(0, bp->b_validend - (bp->b_bcount-resid))); 1367 } 1368 1369 nbp->vb_xfer = vnx; /* patch it back in to vnx */ 1370 1371 /* 1372 * Just sort by block number 1373 */ 1374 nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno; 1375 s = splbio(); 1376 if (vnx->vx_error != 0) { 1377 putvndbuf(nbp); 1378 goto out; 1379 } 1380 vnx->vx_pending++; 1381 1382 /* assoc new buffer with underlying vnode */ 1383 bgetvp(vp, &nbp->vb_buf); 1384 1385 /* sort it in and start I/O if we are not over our limit */ 1386 disksort(&sdp->swd_tab, &nbp->vb_buf); 1387 sw_reg_start(sdp); 1388 splx(s); 1389 1390 /* 1391 * advance to the next I/O 1392 */ 1393 byteoff += sz; 1394 addr += sz; 1395 } 1396 1397 s = splbio(); 1398 1399 out: /* Arrive here at splbio */ 1400 vnx->vx_flags &= ~VX_BUSY; 1401 if (vnx->vx_pending == 0) { 1402 if (vnx->vx_error != 0) { 1403 bp->b_error = vnx->vx_error; 1404 bp->b_flags |= B_ERROR; 1405 } 1406 putvndxfer(vnx); 1407 biodone(bp); 1408 } 1409 splx(s); 1410 } 1411 1412 /* 1413 * sw_reg_start: start an I/O request on the requested swapdev 1414 * 1415 * => reqs are sorted by disksort (above) 1416 */ 1417 static void 1418 sw_reg_start(struct swapdev *sdp) 1419 { 1420 struct buf *bp; 1421 UVMHIST_FUNC("sw_reg_start"); UVMHIST_CALLED(pdhist); 1422 1423 /* recursion control */ 1424 if ((sdp->swd_flags & SWF_BUSY) != 0) 1425 return; 1426 1427 sdp->swd_flags |= SWF_BUSY; 1428 1429 while (sdp->swd_tab.b_active < sdp->swd_maxactive) { 1430 bp = sdp->swd_tab.b_actf; 1431 if (bp == NULL) 1432 break; 1433 sdp->swd_tab.b_actf = bp->b_actf; 1434 sdp->swd_tab.b_active++; 1435 1436 UVMHIST_LOG(pdhist, 1437 "sw_reg_start: bp %p vp %p blkno 0x%lx cnt 0x%lx", 1438 bp, bp->b_vp, bp->b_blkno, bp->b_bcount); 1439 if ((bp->b_flags & B_READ) == 0) 1440 bp->b_vp->v_numoutput++; 1441 1442 VOP_STRATEGY(bp); 1443 } 1444 sdp->swd_flags &= ~SWF_BUSY; 1445 } 1446 1447 /* 1448 * sw_reg_iodone: one of our i/o's has completed and needs post-i/o cleanup 1449 * 1450 * => note that we can recover the vndbuf struct by casting the buf ptr 1451 */ 1452 static void 1453 sw_reg_iodone(struct buf *bp) 1454 { 1455 struct vndbuf *vbp = (struct vndbuf *) bp; 1456 struct vndxfer *vnx = vbp->vb_xfer; 1457 struct buf *pbp = vnx->vx_bp; /* parent buffer */ 1458 struct swapdev *sdp = vnx->vx_sdp; 1459 int resid; 1460 UVMHIST_FUNC("sw_reg_iodone"); UVMHIST_CALLED(pdhist); 1461 1462 UVMHIST_LOG(pdhist, " vbp=%p vp=%p blkno=0x%lx addr=%p", 1463 vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, vbp->vb_buf.b_data); 1464 UVMHIST_LOG(pdhist, " cnt=%lx resid=%lx", 1465 vbp->vb_buf.b_bcount, vbp->vb_buf.b_resid, 0, 0); 1466 1467 splassert(IPL_BIO); 1468 1469 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 1470 pbp->b_resid -= resid; 1471 vnx->vx_pending--; 1472 1473 if (vbp->vb_buf.b_error) { 1474 UVMHIST_LOG(pdhist, " got error=%ld !", 1475 vbp->vb_buf.b_error, 0, 0, 0); 1476 1477 /* pass error upward */ 1478 vnx->vx_error = vbp->vb_buf.b_error; 1479 } 1480 1481 /* 1482 * disassociate this buffer from the vnode (if any). 1483 */ 1484 if (vbp->vb_buf.b_vp != NULL) { 1485 brelvp(&vbp->vb_buf); 1486 } 1487 1488 /* 1489 * kill vbp structure 1490 */ 1491 putvndbuf(vbp); 1492 1493 /* 1494 * wrap up this transaction if it has run to completion or, in 1495 * case of an error, when all auxiliary buffers have returned. 1496 */ 1497 if (vnx->vx_error != 0) { 1498 /* pass error upward */ 1499 pbp->b_flags |= B_ERROR; 1500 pbp->b_error = vnx->vx_error; 1501 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 1502 putvndxfer(vnx); 1503 biodone(pbp); 1504 } 1505 } else if (pbp->b_resid == 0) { 1506 KASSERT(vnx->vx_pending == 0); 1507 if ((vnx->vx_flags & VX_BUSY) == 0) { 1508 UVMHIST_LOG(pdhist, " iodone error=%ld !", 1509 pbp, vnx->vx_error, 0, 0); 1510 putvndxfer(vnx); 1511 biodone(pbp); 1512 } 1513 } 1514 1515 /* 1516 * done! start next swapdev I/O if one is pending 1517 */ 1518 sdp->swd_tab.b_active--; 1519 sw_reg_start(sdp); 1520 } 1521 1522 1523 /* 1524 * uvm_swap_alloc: allocate space on swap 1525 * 1526 * => allocation is done "round robin" down the priority list, as we 1527 * allocate in a priority we "rotate" the circle queue. 1528 * => space can be freed with uvm_swap_free 1529 * => we return the page slot number in /dev/drum (0 == invalid slot) 1530 * => we lock uvm.swap_data_lock 1531 * => XXXMRG: "LESSOK" INTERFACE NEEDED TO EXTENT SYSTEM 1532 */ 1533 int 1534 uvm_swap_alloc(int *nslots, boolean_t lessok) 1535 { 1536 struct swapdev *sdp; 1537 struct swappri *spp; 1538 u_long result; 1539 UVMHIST_FUNC("uvm_swap_alloc"); UVMHIST_CALLED(pdhist); 1540 1541 /* 1542 * no swap devices configured yet? definite failure. 1543 */ 1544 if (uvmexp.nswapdev < 1) 1545 return 0; 1546 1547 /* 1548 * lock data lock, convert slots into blocks, and enter loop 1549 */ 1550 simple_lock(&uvm.swap_data_lock); 1551 1552 ReTry: /* XXXMRG */ 1553 for (spp = LIST_FIRST(&swap_priority); spp != NULL; 1554 spp = LIST_NEXT(spp, spi_swappri)) { 1555 for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev); 1556 sdp != (void *)&spp->spi_swapdev; 1557 sdp = CIRCLEQ_NEXT(sdp,swd_next)) { 1558 /* if it's not enabled, then we can't swap from it */ 1559 if ((sdp->swd_flags & SWF_ENABLE) == 0) 1560 continue; 1561 if (sdp->swd_npginuse + *nslots > sdp->swd_npages) 1562 continue; 1563 if (extent_alloc(sdp->swd_ex, *nslots, EX_NOALIGN, 0, 1564 EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT, 1565 &result) != 0) { 1566 continue; 1567 } 1568 1569 /* 1570 * successful allocation! now rotate the circleq. 1571 */ 1572 CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next); 1573 CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); 1574 sdp->swd_npginuse += *nslots; 1575 uvmexp.swpginuse += *nslots; 1576 simple_unlock(&uvm.swap_data_lock); 1577 /* done! return drum slot number */ 1578 UVMHIST_LOG(pdhist, 1579 "success! returning %ld slots starting at %ld", 1580 *nslots, result + sdp->swd_drumoffset, 0, 0); 1581 return(result + sdp->swd_drumoffset); 1582 } 1583 } 1584 1585 /* XXXMRG: BEGIN HACK */ 1586 if (*nslots > 1 && lessok) { 1587 *nslots = 1; 1588 goto ReTry; /* XXXMRG: ugh! extent should support this for us */ 1589 } 1590 /* XXXMRG: END HACK */ 1591 1592 simple_unlock(&uvm.swap_data_lock); 1593 return 0; /* failed */ 1594 } 1595 1596 /* 1597 * uvm_swap_markbad: keep track of swap ranges where we've had i/o errors 1598 * 1599 * => we lock uvm.swap_data_lock 1600 */ 1601 void 1602 uvm_swap_markbad(int startslot, int nslots) 1603 { 1604 struct swapdev *sdp; 1605 UVMHIST_FUNC("uvm_swap_markbad"); UVMHIST_CALLED(pdhist); 1606 1607 simple_lock(&uvm.swap_data_lock); 1608 sdp = swapdrum_getsdp(startslot); 1609 if (sdp != NULL) { 1610 /* 1611 * we just keep track of how many pages have been marked bad 1612 * in this device, to make everything add up in swap_off(). 1613 * we assume here that the range of slots will all be within 1614 * one swap device. 1615 */ 1616 sdp->swd_npgbad += nslots; 1617 UVMHIST_LOG(pdhist, "now %ld bad", sdp->swd_npgbad, 0,0,0); 1618 } 1619 simple_unlock(&uvm.swap_data_lock); 1620 } 1621 1622 /* 1623 * uvm_swap_free: free swap slots 1624 * 1625 * => this can be all or part of an allocation made by uvm_swap_alloc 1626 * => we lock uvm.swap_data_lock 1627 */ 1628 void 1629 uvm_swap_free(int startslot, int nslots) 1630 { 1631 struct swapdev *sdp; 1632 UVMHIST_FUNC("uvm_swap_free"); UVMHIST_CALLED(pdhist); 1633 1634 UVMHIST_LOG(pdhist, "freeing %ld slots starting at %ld", nslots, 1635 startslot, 0, 0); 1636 1637 /* 1638 * ignore attempts to free the "bad" slot. 1639 */ 1640 1641 if (startslot == SWSLOT_BAD) { 1642 return; 1643 } 1644 1645 /* 1646 * convert drum slot offset back to sdp, free the blocks 1647 * in the extent, and return. must hold pri lock to do 1648 * lookup and access the extent. 1649 */ 1650 1651 simple_lock(&uvm.swap_data_lock); 1652 sdp = swapdrum_getsdp(startslot); 1653 KASSERT(uvmexp.nswapdev >= 1); 1654 KASSERT(sdp != NULL); 1655 KASSERT(sdp->swd_npginuse >= nslots); 1656 if (extent_free(sdp->swd_ex, startslot - sdp->swd_drumoffset, nslots, 1657 EX_MALLOCOK|EX_NOWAIT) != 0) { 1658 printf("warning: resource shortage: %d pages of swap lost\n", 1659 nslots); 1660 } 1661 1662 sdp->swd_npginuse -= nslots; 1663 uvmexp.swpginuse -= nslots; 1664 #ifdef UVM_SWAP_ENCRYPT 1665 { 1666 int i; 1667 if (swap_encrypt_initialized) { 1668 /* Dereference keys */ 1669 for (i = 0; i < nslots; i++) 1670 if (uvm_swap_needdecrypt(sdp, startslot + i)) { 1671 struct swap_key *key; 1672 1673 key = SWD_KEY(sdp, startslot + i); 1674 if (key->refcount != 0) 1675 SWAP_KEY_PUT(sdp, key); 1676 } 1677 1678 /* Mark range as not decrypt */ 1679 uvm_swap_markdecrypt(sdp, startslot, nslots, 0); 1680 } 1681 } 1682 #endif /* UVM_SWAP_ENCRYPT */ 1683 simple_unlock(&uvm.swap_data_lock); 1684 } 1685 1686 /* 1687 * uvm_swap_put: put any number of pages into a contig place on swap 1688 * 1689 * => can be sync or async 1690 * => XXXMRG: consider making it an inline or macro 1691 */ 1692 int 1693 uvm_swap_put(int swslot, struct vm_page **ppsp, int npages, int flags) 1694 { 1695 int result; 1696 1697 result = uvm_swap_io(ppsp, swslot, npages, B_WRITE | 1698 ((flags & PGO_SYNCIO) ? 0 : B_ASYNC)); 1699 1700 return (result); 1701 } 1702 1703 /* 1704 * uvm_swap_get: get a single page from swap 1705 * 1706 * => usually a sync op (from fault) 1707 * => XXXMRG: consider making it an inline or macro 1708 */ 1709 int 1710 uvm_swap_get(struct vm_page *page, int swslot, int flags) 1711 { 1712 int result; 1713 1714 uvmexp.nswget++; 1715 KASSERT(flags & PGO_SYNCIO); 1716 if (swslot == SWSLOT_BAD) { 1717 return VM_PAGER_ERROR; 1718 } 1719 1720 /* 1721 * this page is (about to be) no longer only in swap. 1722 */ 1723 simple_lock(&uvm.swap_data_lock); 1724 uvmexp.swpgonly--; 1725 simple_unlock(&uvm.swap_data_lock); 1726 1727 result = uvm_swap_io(&page, swslot, 1, B_READ | 1728 ((flags & PGO_SYNCIO) ? 0 : B_ASYNC)); 1729 1730 if (result != VM_PAGER_OK && result != VM_PAGER_PEND) { 1731 /* 1732 * oops, the read failed so it really is still only in swap. 1733 */ 1734 simple_lock(&uvm.swap_data_lock); 1735 uvmexp.swpgonly++; 1736 simple_unlock(&uvm.swap_data_lock); 1737 } 1738 1739 return (result); 1740 } 1741 1742 /* 1743 * uvm_swap_io: do an i/o operation to swap 1744 */ 1745 1746 static int 1747 uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags) 1748 { 1749 daddr64_t startblk; 1750 struct buf *bp; 1751 vaddr_t kva; 1752 int result, s, mapinflags, pflag; 1753 boolean_t write, async; 1754 #ifdef UVM_SWAP_ENCRYPT 1755 vaddr_t dstkva; 1756 struct vm_page *tpps[MAXBSIZE >> PAGE_SHIFT]; 1757 struct swapdev *sdp; 1758 int encrypt = 0; 1759 #endif 1760 UVMHIST_FUNC("uvm_swap_io"); UVMHIST_CALLED(pdhist); 1761 1762 UVMHIST_LOG(pdhist, "<- called, startslot=%ld, npages=%ld, flags=%ld", 1763 startslot, npages, flags, 0); 1764 1765 write = (flags & B_READ) == 0; 1766 async = (flags & B_ASYNC) != 0; 1767 1768 /* 1769 * convert starting drum slot to block number 1770 */ 1771 startblk = btodb((u_int64_t)startslot << PAGE_SHIFT); 1772 1773 /* 1774 * first, map the pages into the kernel (XXX: currently required 1775 * by buffer system). 1776 */ 1777 mapinflags = !write ? UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE; 1778 if (!async) 1779 mapinflags |= UVMPAGER_MAPIN_WAITOK; 1780 kva = uvm_pagermapin(pps, npages, mapinflags); 1781 if (kva == 0) 1782 return (VM_PAGER_AGAIN); 1783 1784 #ifdef UVM_SWAP_ENCRYPT 1785 if (write) { 1786 /* 1787 * Check if we need to do swap encryption on old pages. 1788 * Later we need a different scheme, that swap encrypts 1789 * all pages of a process that had at least one page swap 1790 * encrypted. Then we might not need to copy all pages 1791 * in the cluster, and avoid the memory overheard in 1792 * swapping. 1793 */ 1794 if (uvm_doswapencrypt) 1795 encrypt = 1; 1796 } 1797 1798 if (swap_encrypt_initialized || encrypt) { 1799 /* 1800 * we need to know the swap device that we are swapping to/from 1801 * to see if the pages need to be marked for decryption or 1802 * actually need to be decrypted. 1803 * XXX - does this information stay the same over the whole 1804 * execution of this function? 1805 */ 1806 simple_lock(&uvm.swap_data_lock); 1807 sdp = swapdrum_getsdp(startslot); 1808 simple_unlock(&uvm.swap_data_lock); 1809 } 1810 1811 /* 1812 * encrypt to swap 1813 */ 1814 if (write && encrypt) { 1815 int i, opages; 1816 caddr_t src, dst; 1817 struct swap_key *key; 1818 u_int64_t block; 1819 int swmapflags; 1820 1821 /* We always need write access. */ 1822 swmapflags = UVMPAGER_MAPIN_READ; 1823 if (!async) 1824 swmapflags |= UVMPAGER_MAPIN_WAITOK; 1825 1826 if (!uvm_swap_allocpages(tpps, npages)) { 1827 uvm_pagermapout(kva, npages); 1828 return (VM_PAGER_AGAIN); 1829 } 1830 1831 dstkva = uvm_pagermapin(tpps, npages, swmapflags); 1832 if (dstkva == 0) { 1833 uvm_pagermapout(kva, npages); 1834 uvm_swap_freepages(tpps, npages); 1835 return (VM_PAGER_AGAIN); 1836 } 1837 1838 src = (caddr_t) kva; 1839 dst = (caddr_t) dstkva; 1840 block = startblk; 1841 for (i = 0; i < npages; i++) { 1842 key = SWD_KEY(sdp, startslot + i); 1843 SWAP_KEY_GET(sdp, key); /* add reference */ 1844 1845 /* mark for async writes */ 1846 atomic_setbits_int(&tpps[i]->pg_flags, PQ_ENCRYPT); 1847 swap_encrypt(key, src, dst, block, 1 << PAGE_SHIFT); 1848 src += 1 << PAGE_SHIFT; 1849 dst += 1 << PAGE_SHIFT; 1850 block += btodb(1 << PAGE_SHIFT); 1851 } 1852 1853 uvm_pagermapout(kva, npages); 1854 1855 /* dispose of pages we dont use anymore */ 1856 opages = npages; 1857 uvm_pager_dropcluster(NULL, NULL, pps, &opages, 1858 PGO_PDFREECLUST); 1859 1860 kva = dstkva; 1861 } 1862 #endif /* UVM_SWAP_ENCRYPT */ 1863 1864 /* 1865 * now allocate a buf for the i/o. 1866 * [make sure we don't put the pagedaemon to sleep...] 1867 */ 1868 s = splbio(); 1869 pflag = (async || curproc == uvm.pagedaemon_proc) ? 0 : PR_WAITOK; 1870 bp = pool_get(&bufpool, pflag); 1871 splx(s); 1872 1873 /* 1874 * if we failed to get a swapbuf, return "try again" 1875 */ 1876 if (bp == NULL) { 1877 #ifdef UVM_SWAP_ENCRYPT 1878 if (write && encrypt) { 1879 int i; 1880 1881 /* swap encrypt needs cleanup */ 1882 for (i = 0; i < npages; i++) 1883 SWAP_KEY_PUT(sdp, SWD_KEY(sdp, startslot + i)); 1884 1885 uvm_pagermapout(kva, npages); 1886 uvm_swap_freepages(tpps, npages); 1887 } 1888 #endif 1889 return (VM_PAGER_AGAIN); 1890 } 1891 1892 #ifdef UVM_SWAP_ENCRYPT 1893 /* 1894 * prevent ASYNC reads. 1895 * uvm_swap_io is only called from uvm_swap_get, uvm_swap_get 1896 * assumes that all gets are SYNCIO. Just make sure here. 1897 * XXXARTUBC - might not be true anymore. 1898 */ 1899 if (!write) { 1900 flags &= ~B_ASYNC; 1901 async = 0; 1902 } 1903 #endif 1904 /* 1905 * fill in the bp. we currently route our i/o through 1906 * /dev/drum's vnode [swapdev_vp]. 1907 */ 1908 bp->b_flags = B_BUSY | B_NOCACHE | B_RAW | (flags & (B_READ|B_ASYNC)); 1909 bp->b_proc = &proc0; /* XXX */ 1910 bp->b_vnbufs.le_next = NOLIST; 1911 bp->b_data = (caddr_t)kva; 1912 bp->b_blkno = startblk; 1913 LIST_INIT(&bp->b_dep); 1914 s = splbio(); 1915 bp->b_vp = NULL; 1916 buf_replacevnode(bp, swapdev_vp); 1917 splx(s); 1918 bp->b_bufsize = bp->b_bcount = npages << PAGE_SHIFT; 1919 1920 /* 1921 * for pageouts we must set "dirtyoff" [NFS client code needs it]. 1922 * and we bump v_numoutput (counter of number of active outputs). 1923 */ 1924 if (write) { 1925 bp->b_dirtyoff = 0; 1926 bp->b_dirtyend = npages << PAGE_SHIFT; 1927 #ifdef UVM_SWAP_ENCRYPT 1928 /* mark the pages in the drum for decryption */ 1929 if (swap_encrypt_initialized) 1930 uvm_swap_markdecrypt(sdp, startslot, npages, encrypt); 1931 #endif 1932 s = splbio(); 1933 swapdev_vp->v_numoutput++; 1934 splx(s); 1935 } 1936 1937 /* 1938 * for async ops we must set up the iodone handler. 1939 */ 1940 if (async) { 1941 bp->b_flags |= B_CALL | (curproc == uvm.pagedaemon_proc ? 1942 B_PDAEMON : 0); 1943 bp->b_iodone = uvm_aio_biodone; 1944 UVMHIST_LOG(pdhist, "doing async!", 0, 0, 0, 0); 1945 } 1946 UVMHIST_LOG(pdhist, 1947 "about to start io: data = %p blkno = 0x%lx, bcount = %ld", 1948 bp->b_data, bp->b_blkno, bp->b_bcount, 0); 1949 1950 /* 1951 * now we start the I/O, and if async, return. 1952 */ 1953 VOP_STRATEGY(bp); 1954 if (async) 1955 return (VM_PAGER_PEND); 1956 1957 /* 1958 * must be sync i/o. wait for it to finish 1959 */ 1960 (void) biowait(bp); 1961 result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK; 1962 1963 #ifdef UVM_SWAP_ENCRYPT 1964 /* 1965 * decrypt swap 1966 */ 1967 if (swap_encrypt_initialized && 1968 (bp->b_flags & B_READ) && !(bp->b_flags & B_ERROR)) { 1969 int i; 1970 caddr_t data = bp->b_data; 1971 u_int64_t block = startblk; 1972 struct swap_key *key; 1973 1974 for (i = 0; i < npages; i++) { 1975 /* Check if we need to decrypt */ 1976 if (uvm_swap_needdecrypt(sdp, startslot + i)) { 1977 key = SWD_KEY(sdp, startslot + i); 1978 if (key->refcount == 0) { 1979 result = VM_PAGER_ERROR; 1980 break; 1981 } 1982 swap_decrypt(key, data, data, block, 1983 1 << PAGE_SHIFT); 1984 } 1985 data += 1 << PAGE_SHIFT; 1986 block += btodb(1 << PAGE_SHIFT); 1987 } 1988 } 1989 #endif 1990 /* 1991 * kill the pager mapping 1992 */ 1993 uvm_pagermapout(kva, npages); 1994 1995 #ifdef UVM_SWAP_ENCRYPT 1996 /* 1997 * Not anymore needed, free after encryption 1998 */ 1999 if ((bp->b_flags & B_READ) == 0 && encrypt) 2000 uvm_swap_freepages(tpps, npages); 2001 #endif 2002 /* 2003 * now dispose of the buf 2004 */ 2005 s = splbio(); 2006 if (bp->b_vp) 2007 brelvp(bp); 2008 2009 if (write && bp->b_vp) 2010 vwakeup(bp->b_vp); 2011 pool_put(&bufpool, bp); 2012 splx(s); 2013 2014 /* 2015 * finally return. 2016 */ 2017 UVMHIST_LOG(pdhist, "<- done (sync) result=%ld", result, 0, 0, 0); 2018 return (result); 2019 } 2020 2021 static void 2022 swapmount(void) 2023 { 2024 struct swapdev *sdp; 2025 struct swappri *spp; 2026 struct vnode *vp; 2027 dev_t swap_dev = swdevt[0].sw_dev; 2028 2029 /* 2030 * No locking here since we happen to know that we will just be called 2031 * once before any other process has forked. 2032 */ 2033 2034 if (swap_dev == NODEV) { 2035 printf("swapmount: no device\n"); 2036 return; 2037 } 2038 2039 if (bdevvp(swap_dev, &vp)) { 2040 printf("swapmount: no device 2\n"); 2041 return; 2042 } 2043 2044 sdp = malloc(sizeof(*sdp), M_VMSWAP, M_WAITOK|M_ZERO); 2045 spp = malloc(sizeof(*spp), M_VMSWAP, M_WAITOK); 2046 2047 sdp->swd_flags = SWF_FAKE; 2048 sdp->swd_dev = swap_dev; 2049 sdp->swd_vp = vp; 2050 swaplist_insert(sdp, spp, 0); 2051 sdp->swd_pathlen = strlen("swap_device") + 1; 2052 sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK); 2053 if (copystr("swap_device", sdp->swd_path, sdp->swd_pathlen, 0)) 2054 panic("swapmount: copystr"); 2055 2056 if (swap_on(curproc, sdp)) { 2057 swaplist_find(vp, 1); 2058 swaplist_trim(); 2059 vput(sdp->swd_vp); 2060 free(sdp->swd_path, M_VMSWAP); 2061 free(sdp, M_VMSWAP); 2062 return; 2063 } 2064 2065 VOP_UNLOCK(vp, 0, curproc); 2066 } 2067