1 /* $OpenBSD: uvm_swap.c,v 1.104 2011/07/04 20:35:35 deraadt Exp $ */ 2 /* $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $ */ 3 4 /* 5 * Copyright (c) 1995, 1996, 1997 Matthew R. Green 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * from: NetBSD: vm_swap.c,v 1.52 1997/12/02 13:47:37 pk Exp 32 * from: Id: uvm_swap.c,v 1.1.2.42 1998/02/02 20:38:06 chuck Exp 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/buf.h> 38 #include <sys/conf.h> 39 #include <sys/proc.h> 40 #include <sys/namei.h> 41 #include <sys/disklabel.h> 42 #include <sys/errno.h> 43 #include <sys/kernel.h> 44 #include <sys/malloc.h> 45 #include <sys/vnode.h> 46 #include <sys/file.h> 47 #include <sys/extent.h> 48 #include <sys/mount.h> 49 #include <sys/pool.h> 50 #include <sys/syscallargs.h> 51 #include <sys/swap.h> 52 #include <sys/disk.h> 53 54 #include <uvm/uvm.h> 55 #ifdef UVM_SWAP_ENCRYPT 56 #include <dev/rndvar.h> 57 #include <sys/syslog.h> 58 #endif 59 60 #include <sys/specdev.h> 61 62 #include "vnd.h" 63 64 /* 65 * uvm_swap.c: manage configuration and i/o to swap space. 66 */ 67 68 /* 69 * swap space is managed in the following way: 70 * 71 * each swap partition or file is described by a "swapdev" structure. 72 * each "swapdev" structure contains a "swapent" structure which contains 73 * information that is passed up to the user (via system calls). 74 * 75 * each swap partition is assigned a "priority" (int) which controls 76 * swap partition usage. 77 * 78 * the system maintains a global data structure describing all swap 79 * partitions/files. there is a sorted LIST of "swappri" structures 80 * which describe "swapdev"'s at that priority. this LIST is headed 81 * by the "swap_priority" global var. each "swappri" contains a 82 * CIRCLEQ of "swapdev" structures at that priority. 83 * 84 * locking: 85 * - swap_syscall_lock (sleep lock): this lock serializes the swapctl 86 * system call and prevents the swap priority list from changing 87 * while we are in the middle of a system call (e.g. SWAP_STATS). 88 * - uvm.swap_data_lock (simple_lock): this lock protects all swap data 89 * structures including the priority list, the swapdev structures, 90 * and the swapmap extent. 91 * 92 * each swap device has the following info: 93 * - swap device in use (could be disabled, preventing future use) 94 * - swap enabled (allows new allocations on swap) 95 * - map info in /dev/drum 96 * - vnode pointer 97 * for swap files only: 98 * - block size 99 * - max byte count in buffer 100 * - buffer 101 * - credentials to use when doing i/o to file 102 * 103 * userland controls and configures swap with the swapctl(2) system call. 104 * the sys_swapctl performs the following operations: 105 * [1] SWAP_NSWAP: returns the number of swap devices currently configured 106 * [2] SWAP_STATS: given a pointer to an array of swapent structures 107 * (passed in via "arg") of a size passed in via "misc" ... we load 108 * the current swap config into the array. 109 * [3] SWAP_ON: given a pathname in arg (could be device or file) and a 110 * priority in "misc", start swapping on it. 111 * [4] SWAP_OFF: as SWAP_ON, but stops swapping to a device 112 * [5] SWAP_CTL: changes the priority of a swap device (new priority in 113 * "misc") 114 */ 115 116 /* 117 * swapdev: describes a single swap partition/file 118 * 119 * note the following should be true: 120 * swd_inuse <= swd_nblks [number of blocks in use is <= total blocks] 121 * swd_nblks <= swd_mapsize [because mapsize includes miniroot+disklabel] 122 */ 123 struct swapdev { 124 struct swapent swd_se; 125 #define swd_dev swd_se.se_dev /* device id */ 126 #define swd_flags swd_se.se_flags /* flags:inuse/enable/fake */ 127 #define swd_priority swd_se.se_priority /* our priority */ 128 #define swd_inuse swd_se.se_inuse /* our priority */ 129 #define swd_nblks swd_se.se_nblks /* our priority */ 130 char *swd_path; /* saved pathname of device */ 131 int swd_pathlen; /* length of pathname */ 132 int swd_npages; /* #pages we can use */ 133 int swd_npginuse; /* #pages in use */ 134 int swd_npgbad; /* #pages bad */ 135 int swd_drumoffset; /* page0 offset in drum */ 136 int swd_drumsize; /* #pages in drum */ 137 struct extent *swd_ex; /* extent for this swapdev */ 138 char swd_exname[12]; /* name of extent above */ 139 struct vnode *swd_vp; /* backing vnode */ 140 CIRCLEQ_ENTRY(swapdev) swd_next; /* priority circleq */ 141 142 int swd_bsize; /* blocksize (bytes) */ 143 int swd_maxactive; /* max active i/o reqs */ 144 int swd_active; /* # of active i/o reqs */ 145 struct bufq swd_bufq; 146 struct ucred *swd_cred; /* cred for file access */ 147 #ifdef UVM_SWAP_ENCRYPT 148 #define SWD_KEY_SHIFT 7 /* One key per 0.5 MByte */ 149 #define SWD_KEY(x,y) &((x)->swd_keys[((y) - (x)->swd_drumoffset) >> SWD_KEY_SHIFT]) 150 #define SWD_KEY_SIZE(x) (((x) + (1 << SWD_KEY_SHIFT) - 1) >> SWD_KEY_SHIFT) 151 152 #define SWD_DCRYPT_SHIFT 5 153 #define SWD_DCRYPT_BITS 32 154 #define SWD_DCRYPT_MASK (SWD_DCRYPT_BITS - 1) 155 #define SWD_DCRYPT_OFF(x) ((x) >> SWD_DCRYPT_SHIFT) 156 #define SWD_DCRYPT_BIT(x) ((x) & SWD_DCRYPT_MASK) 157 #define SWD_DCRYPT_SIZE(x) (SWD_DCRYPT_OFF((x) + SWD_DCRYPT_MASK) * sizeof(u_int32_t)) 158 u_int32_t *swd_decrypt; /* bitmap for decryption */ 159 struct swap_key *swd_keys; /* keys for different parts */ 160 #endif 161 }; 162 163 /* 164 * swap device priority entry; the list is kept sorted on `spi_priority'. 165 */ 166 struct swappri { 167 int spi_priority; /* priority */ 168 CIRCLEQ_HEAD(spi_swapdev, swapdev) spi_swapdev; 169 /* circleq of swapdevs at this priority */ 170 LIST_ENTRY(swappri) spi_swappri; /* global list of pri's */ 171 }; 172 173 /* 174 * The following two structures are used to keep track of data transfers 175 * on swap devices associated with regular files. 176 * NOTE: this code is more or less a copy of vnd.c; we use the same 177 * structure names here to ease porting.. 178 */ 179 struct vndxfer { 180 struct buf *vx_bp; /* Pointer to parent buffer */ 181 struct swapdev *vx_sdp; 182 int vx_error; 183 int vx_pending; /* # of pending aux buffers */ 184 int vx_flags; 185 #define VX_BUSY 1 186 #define VX_DEAD 2 187 }; 188 189 struct vndbuf { 190 struct buf vb_buf; 191 struct vndxfer *vb_xfer; 192 }; 193 194 195 /* 196 * We keep a of pool vndbuf's and vndxfer structures. 197 */ 198 struct pool vndxfer_pool; 199 struct pool vndbuf_pool; 200 201 #define getvndxfer(vnx) do { \ 202 int s = splbio(); \ 203 vnx = pool_get(&vndxfer_pool, PR_WAITOK); \ 204 splx(s); \ 205 } while (0) 206 207 #define putvndxfer(vnx) { \ 208 pool_put(&vndxfer_pool, (void *)(vnx)); \ 209 } 210 211 #define getvndbuf(vbp) do { \ 212 int s = splbio(); \ 213 vbp = pool_get(&vndbuf_pool, PR_WAITOK); \ 214 splx(s); \ 215 } while (0) 216 217 #define putvndbuf(vbp) { \ 218 pool_put(&vndbuf_pool, (void *)(vbp)); \ 219 } 220 221 /* 222 * local variables 223 */ 224 struct extent *swapmap; /* controls the mapping of /dev/drum */ 225 226 /* list of all active swap devices [by priority] */ 227 LIST_HEAD(swap_priority, swappri); 228 struct swap_priority swap_priority; 229 230 /* locks */ 231 struct rwlock swap_syscall_lock = RWLOCK_INITIALIZER("swplk"); 232 233 /* 234 * prototypes 235 */ 236 void swapdrum_add(struct swapdev *, int); 237 struct swapdev *swapdrum_getsdp(int); 238 239 struct swapdev *swaplist_find(struct vnode *, int); 240 void swaplist_insert(struct swapdev *, 241 struct swappri *, int); 242 void swaplist_trim(void); 243 244 int swap_on(struct proc *, struct swapdev *); 245 int swap_off(struct proc *, struct swapdev *); 246 247 void sw_reg_strategy(struct swapdev *, struct buf *, int); 248 void sw_reg_iodone(struct buf *); 249 void sw_reg_iodone_internal(void *, void *); 250 void sw_reg_start(struct swapdev *); 251 252 int uvm_swap_io(struct vm_page **, int, int, int); 253 254 void swapmount(void); 255 boolean_t uvm_swap_allocpages(struct vm_page **, int); 256 257 #ifdef UVM_SWAP_ENCRYPT 258 /* for swap encrypt */ 259 void uvm_swap_markdecrypt(struct swapdev *, int, int, int); 260 boolean_t uvm_swap_needdecrypt(struct swapdev *, int); 261 void uvm_swap_initcrypt(struct swapdev *, int); 262 #endif 263 264 /* 265 * uvm_swap_init: init the swap system data structures and locks 266 * 267 * => called at boot time from init_main.c after the filesystems 268 * are brought up (which happens after uvm_init()) 269 */ 270 void 271 uvm_swap_init(void) 272 { 273 /* 274 * first, init the swap list, its counter, and its lock. 275 * then get a handle on the vnode for /dev/drum by using 276 * the its dev_t number ("swapdev", from MD conf.c). 277 */ 278 279 LIST_INIT(&swap_priority); 280 uvmexp.nswapdev = 0; 281 simple_lock_init(&uvm.swap_data_lock); 282 283 if (!swapdev_vp && bdevvp(swapdev, &swapdev_vp)) 284 panic("uvm_swap_init: can't get vnode for swap device"); 285 286 /* 287 * create swap block resource map to map /dev/drum. the range 288 * from 1 to INT_MAX allows 2 gigablocks of swap space. note 289 * that block 0 is reserved (used to indicate an allocation 290 * failure, or no allocation). 291 */ 292 swapmap = extent_create("swapmap", 1, INT_MAX, 293 M_VMSWAP, 0, 0, EX_NOWAIT); 294 if (swapmap == 0) 295 panic("uvm_swap_init: extent_create failed"); 296 297 /* 298 * allocate pools for structures used for swapping to files. 299 */ 300 301 302 pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, 0, 0, "swp vnx", 303 NULL); 304 305 pool_init(&vndbuf_pool, sizeof(struct vndbuf), 0, 0, 0, "swp vnd", 306 NULL); 307 308 /* 309 * Setup the initial swap partition 310 */ 311 swapmount(); 312 313 /* 314 * done! 315 */ 316 } 317 318 #ifdef UVM_SWAP_ENCRYPT 319 void 320 uvm_swap_initcrypt_all(void) 321 { 322 struct swapdev *sdp; 323 struct swappri *spp; 324 int npages; 325 326 simple_lock(&uvm.swap_data_lock); 327 328 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 329 CIRCLEQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) 330 if (sdp->swd_decrypt == NULL) { 331 npages = dbtob((uint64_t)sdp->swd_nblks) >> 332 PAGE_SHIFT; 333 uvm_swap_initcrypt(sdp, npages); 334 } 335 } 336 simple_unlock(&uvm.swap_data_lock); 337 } 338 339 void 340 uvm_swap_initcrypt(struct swapdev *sdp, int npages) 341 { 342 /* 343 * keep information if a page needs to be decrypted when we get it 344 * from the swap device. 345 * We cannot chance a malloc later, if we are doing ASYNC puts, 346 * we may not call malloc with M_WAITOK. This consumes only 347 * 8KB memory for a 256MB swap partition. 348 */ 349 sdp->swd_decrypt = malloc(SWD_DCRYPT_SIZE(npages), M_VMSWAP, 350 M_WAITOK|M_ZERO); 351 sdp->swd_keys = malloc(SWD_KEY_SIZE(npages) * sizeof(struct swap_key), 352 M_VMSWAP, M_WAITOK|M_ZERO); 353 } 354 355 #endif /* UVM_SWAP_ENCRYPT */ 356 357 boolean_t 358 uvm_swap_allocpages(struct vm_page **pps, int npages) 359 { 360 struct pglist pgl; 361 int i; 362 boolean_t fail; 363 364 /* Estimate if we will succeed */ 365 uvm_lock_fpageq(); 366 367 fail = uvmexp.free - npages < uvmexp.reserve_kernel; 368 369 uvm_unlock_fpageq(); 370 371 if (fail) 372 return FALSE; 373 374 TAILQ_INIT(&pgl); 375 if (uvm_pglistalloc(npages * PAGE_SIZE, dma_constraint.ucr_low, 376 dma_constraint.ucr_high, 0, 0, &pgl, npages, UVM_PLA_NOWAIT)) 377 return FALSE; 378 379 for (i = 0; i < npages; i++) { 380 pps[i] = TAILQ_FIRST(&pgl); 381 /* *sigh* */ 382 atomic_setbits_int(&pps[i]->pg_flags, PG_BUSY); 383 TAILQ_REMOVE(&pgl, pps[i], pageq); 384 } 385 386 return TRUE; 387 } 388 389 void 390 uvm_swap_freepages(struct vm_page **pps, int npages) 391 { 392 int i; 393 394 uvm_lock_pageq(); 395 for (i = 0; i < npages; i++) 396 uvm_pagefree(pps[i]); 397 uvm_unlock_pageq(); 398 } 399 400 #ifdef UVM_SWAP_ENCRYPT 401 /* 402 * Mark pages on the swap device for later decryption 403 */ 404 405 void 406 uvm_swap_markdecrypt(struct swapdev *sdp, int startslot, int npages, 407 int decrypt) 408 { 409 int pagestart, i; 410 int off, bit; 411 412 if (!sdp) 413 return; 414 415 pagestart = startslot - sdp->swd_drumoffset; 416 for (i = 0; i < npages; i++, pagestart++) { 417 off = SWD_DCRYPT_OFF(pagestart); 418 bit = SWD_DCRYPT_BIT(pagestart); 419 if (decrypt) 420 /* pages read need decryption */ 421 sdp->swd_decrypt[off] |= 1 << bit; 422 else 423 /* pages read do not need decryption */ 424 sdp->swd_decrypt[off] &= ~(1 << bit); 425 } 426 } 427 428 /* 429 * Check if the page that we got from disk needs to be decrypted 430 */ 431 432 boolean_t 433 uvm_swap_needdecrypt(struct swapdev *sdp, int off) 434 { 435 if (!sdp) 436 return FALSE; 437 438 off -= sdp->swd_drumoffset; 439 return sdp->swd_decrypt[SWD_DCRYPT_OFF(off)] & (1 << SWD_DCRYPT_BIT(off)) ? 440 TRUE : FALSE; 441 } 442 443 void 444 uvm_swap_finicrypt_all(void) 445 { 446 struct swapdev *sdp; 447 struct swappri *spp; 448 struct swap_key *key; 449 unsigned int nkeys; 450 451 simple_lock(&uvm.swap_data_lock); 452 453 LIST_FOREACH(spp, &swap_priority, spi_swappri) { 454 CIRCLEQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) { 455 if (sdp->swd_decrypt == NULL) 456 continue; 457 458 nkeys = dbtob((uint64_t)sdp->swd_nblks) >> PAGE_SHIFT; 459 key = sdp->swd_keys + (SWD_KEY_SIZE(nkeys) - 1); 460 do { 461 if (key->refcount != 0) 462 swap_key_delete(key); 463 } while (key-- != sdp->swd_keys); 464 } 465 } 466 simple_unlock(&uvm.swap_data_lock); 467 } 468 #endif /* UVM_SWAP_ENCRYPT */ 469 470 /* 471 * swaplist functions: functions that operate on the list of swap 472 * devices on the system. 473 */ 474 475 /* 476 * swaplist_insert: insert swap device "sdp" into the global list 477 * 478 * => caller must hold both swap_syscall_lock and uvm.swap_data_lock 479 * => caller must provide a newly malloc'd swappri structure (we will 480 * FREE it if we don't need it... this it to prevent malloc blocking 481 * here while adding swap) 482 */ 483 void 484 swaplist_insert(struct swapdev *sdp, struct swappri *newspp, int priority) 485 { 486 struct swappri *spp, *pspp; 487 488 /* 489 * find entry at or after which to insert the new device. 490 */ 491 for (pspp = NULL, spp = LIST_FIRST(&swap_priority); spp != NULL; 492 spp = LIST_NEXT(spp, spi_swappri)) { 493 if (priority <= spp->spi_priority) 494 break; 495 pspp = spp; 496 } 497 498 /* 499 * new priority? 500 */ 501 if (spp == NULL || spp->spi_priority != priority) { 502 spp = newspp; /* use newspp! */ 503 504 spp->spi_priority = priority; 505 CIRCLEQ_INIT(&spp->spi_swapdev); 506 507 if (pspp) 508 LIST_INSERT_AFTER(pspp, spp, spi_swappri); 509 else 510 LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri); 511 } else { 512 /* we don't need a new priority structure, free it */ 513 free(newspp, M_VMSWAP); 514 } 515 516 /* 517 * priority found (or created). now insert on the priority's 518 * circleq list and bump the total number of swapdevs. 519 */ 520 sdp->swd_priority = priority; 521 CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); 522 uvmexp.nswapdev++; 523 } 524 525 /* 526 * swaplist_find: find and optionally remove a swap device from the 527 * global list. 528 * 529 * => caller must hold both swap_syscall_lock and uvm.swap_data_lock 530 * => we return the swapdev we found (and removed) 531 */ 532 struct swapdev * 533 swaplist_find(struct vnode *vp, boolean_t remove) 534 { 535 struct swapdev *sdp; 536 struct swappri *spp; 537 538 /* 539 * search the lists for the requested vp 540 */ 541 for (spp = LIST_FIRST(&swap_priority); spp != NULL; 542 spp = LIST_NEXT(spp, spi_swappri)) { 543 for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev); 544 sdp != (void *)&spp->spi_swapdev; 545 sdp = CIRCLEQ_NEXT(sdp, swd_next)) 546 if (sdp->swd_vp == vp) { 547 if (remove) { 548 CIRCLEQ_REMOVE(&spp->spi_swapdev, 549 sdp, swd_next); 550 uvmexp.nswapdev--; 551 } 552 return(sdp); 553 } 554 } 555 return (NULL); 556 } 557 558 559 /* 560 * swaplist_trim: scan priority list for empty priority entries and kill 561 * them. 562 * 563 * => caller must hold both swap_syscall_lock and uvm.swap_data_lock 564 */ 565 void 566 swaplist_trim(void) 567 { 568 struct swappri *spp, *nextspp; 569 570 for (spp = LIST_FIRST(&swap_priority); spp != NULL; spp = nextspp) { 571 nextspp = LIST_NEXT(spp, spi_swappri); 572 if (CIRCLEQ_FIRST(&spp->spi_swapdev) != 573 (void *)&spp->spi_swapdev) 574 continue; 575 LIST_REMOVE(spp, spi_swappri); 576 free(spp, M_VMSWAP); 577 } 578 } 579 580 /* 581 * swapdrum_add: add a "swapdev"'s blocks into /dev/drum's area. 582 * 583 * => caller must hold swap_syscall_lock 584 * => uvm.swap_data_lock should be unlocked (we may sleep) 585 */ 586 void 587 swapdrum_add(struct swapdev *sdp, int npages) 588 { 589 u_long result; 590 591 if (extent_alloc(swapmap, npages, EX_NOALIGN, 0, EX_NOBOUNDARY, 592 EX_WAITOK, &result)) 593 panic("swapdrum_add"); 594 595 sdp->swd_drumoffset = result; 596 sdp->swd_drumsize = npages; 597 } 598 599 /* 600 * swapdrum_getsdp: given a page offset in /dev/drum, convert it back 601 * to the "swapdev" that maps that section of the drum. 602 * 603 * => each swapdev takes one big contig chunk of the drum 604 * => caller must hold uvm.swap_data_lock 605 */ 606 struct swapdev * 607 swapdrum_getsdp(int pgno) 608 { 609 struct swapdev *sdp; 610 struct swappri *spp; 611 612 for (spp = LIST_FIRST(&swap_priority); spp != NULL; 613 spp = LIST_NEXT(spp, spi_swappri)) 614 for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev); 615 sdp != (void *)&spp->spi_swapdev; 616 sdp = CIRCLEQ_NEXT(sdp, swd_next)) 617 if (pgno >= sdp->swd_drumoffset && 618 pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) { 619 return sdp; 620 } 621 return NULL; 622 } 623 624 625 /* 626 * sys_swapctl: main entry point for swapctl(2) system call 627 * [with two helper functions: swap_on and swap_off] 628 */ 629 int 630 sys_swapctl(struct proc *p, void *v, register_t *retval) 631 { 632 struct sys_swapctl_args /* { 633 syscallarg(int) cmd; 634 syscallarg(void *) arg; 635 syscallarg(int) misc; 636 } */ *uap = (struct sys_swapctl_args *)v; 637 struct vnode *vp; 638 struct nameidata nd; 639 struct swappri *spp; 640 struct swapdev *sdp; 641 struct swapent *sep; 642 char userpath[MAXPATHLEN]; 643 size_t len; 644 int count, error, misc; 645 int priority; 646 647 misc = SCARG(uap, misc); 648 649 /* 650 * ensure serialized syscall access by grabbing the swap_syscall_lock 651 */ 652 rw_enter_write(&swap_syscall_lock); 653 654 /* 655 * we handle the non-priv NSWAP and STATS request first. 656 * 657 * SWAP_NSWAP: return number of config'd swap devices 658 * [can also be obtained with uvmexp sysctl] 659 */ 660 if (SCARG(uap, cmd) == SWAP_NSWAP) { 661 *retval = uvmexp.nswapdev; 662 error = 0; 663 goto out; 664 } 665 666 /* 667 * SWAP_STATS: get stats on current # of configured swap devs 668 * 669 * note that the swap_priority list can't change as long 670 * as we are holding the swap_syscall_lock. we don't want 671 * to grab the uvm.swap_data_lock because we may fault&sleep during 672 * copyout() and we don't want to be holding that lock then! 673 */ 674 if (SCARG(uap, cmd) == SWAP_STATS) { 675 sep = (struct swapent *)SCARG(uap, arg); 676 count = 0; 677 678 for (spp = LIST_FIRST(&swap_priority); spp != NULL; 679 spp = LIST_NEXT(spp, spi_swappri)) { 680 for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev); 681 sdp != (void *)&spp->spi_swapdev && misc-- > 0; 682 sdp = CIRCLEQ_NEXT(sdp, swd_next)) { 683 sdp->swd_inuse = 684 btodb((u_int64_t)sdp->swd_npginuse << 685 PAGE_SHIFT); 686 error = copyout(&sdp->swd_se, sep, 687 sizeof(struct swapent)); 688 689 /* now copy out the path if necessary */ 690 if (error == 0) 691 error = copyout(sdp->swd_path, 692 &sep->se_path, sdp->swd_pathlen); 693 694 if (error) 695 goto out; 696 count++; 697 sep++; 698 } 699 } 700 701 *retval = count; 702 error = 0; 703 goto out; 704 } 705 706 /* 707 * all other requests require superuser privs. verify. 708 */ 709 if ((error = suser(p, 0))) 710 goto out; 711 712 /* 713 * at this point we expect a path name in arg. we will 714 * use namei() to gain a vnode reference (vref), and lock 715 * the vnode (VOP_LOCK). 716 * 717 * XXX: a NULL arg means use the root vnode pointer (e.g. for 718 * miniroot) 719 */ 720 if (SCARG(uap, arg) == NULL) { 721 vp = rootvp; /* miniroot */ 722 if (vget(vp, LK_EXCLUSIVE, p)) { 723 error = EBUSY; 724 goto out; 725 } 726 if (SCARG(uap, cmd) == SWAP_ON && 727 copystr("miniroot", userpath, sizeof userpath, &len)) 728 panic("swapctl: miniroot copy failed"); 729 } else { 730 error = copyinstr(SCARG(uap, arg), userpath, 731 sizeof(userpath), &len); 732 if (error) 733 goto out; 734 disk_map(userpath, userpath, sizeof(userpath), 735 DM_OPENBLCK); 736 NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, userpath, p); 737 if ((error = namei(&nd))) 738 goto out; 739 vp = nd.ni_vp; 740 } 741 /* note: "vp" is referenced and locked */ 742 743 error = 0; /* assume no error */ 744 switch(SCARG(uap, cmd)) { 745 746 case SWAP_DUMPDEV: 747 if (vp->v_type != VBLK) { 748 error = ENOTBLK; 749 break; 750 } 751 dumpdev = vp->v_rdev; 752 break; 753 754 case SWAP_CTL: 755 /* 756 * get new priority, remove old entry (if any) and then 757 * reinsert it in the correct place. finally, prune out 758 * any empty priority structures. 759 */ 760 priority = SCARG(uap, misc); 761 spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK); 762 simple_lock(&uvm.swap_data_lock); 763 if ((sdp = swaplist_find(vp, 1)) == NULL) { 764 error = ENOENT; 765 } else { 766 swaplist_insert(sdp, spp, priority); 767 swaplist_trim(); 768 } 769 simple_unlock(&uvm.swap_data_lock); 770 if (error) 771 free(spp, M_VMSWAP); 772 break; 773 774 case SWAP_ON: 775 776 /* 777 * check for duplicates. if none found, then insert a 778 * dummy entry on the list to prevent someone else from 779 * trying to enable this device while we are working on 780 * it. 781 */ 782 783 priority = SCARG(uap, misc); 784 simple_lock(&uvm.swap_data_lock); 785 if ((sdp = swaplist_find(vp, 0)) != NULL) { 786 error = EBUSY; 787 simple_unlock(&uvm.swap_data_lock); 788 break; 789 } 790 sdp = malloc(sizeof *sdp, M_VMSWAP, M_WAITOK|M_ZERO); 791 spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK); 792 sdp->swd_flags = SWF_FAKE; /* placeholder only */ 793 sdp->swd_vp = vp; 794 sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV; 795 796 /* 797 * XXX Is NFS elaboration necessary? 798 */ 799 if (vp->v_type == VREG) { 800 sdp->swd_cred = crdup(p->p_ucred); 801 } 802 803 swaplist_insert(sdp, spp, priority); 804 simple_unlock(&uvm.swap_data_lock); 805 806 sdp->swd_pathlen = len; 807 sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK); 808 if (copystr(userpath, sdp->swd_path, sdp->swd_pathlen, 0) != 0) 809 panic("swapctl: copystr"); 810 811 /* 812 * we've now got a FAKE placeholder in the swap list. 813 * now attempt to enable swap on it. if we fail, undo 814 * what we've done and kill the fake entry we just inserted. 815 * if swap_on is a success, it will clear the SWF_FAKE flag 816 */ 817 818 if ((error = swap_on(p, sdp)) != 0) { 819 simple_lock(&uvm.swap_data_lock); 820 (void) swaplist_find(vp, 1); /* kill fake entry */ 821 swaplist_trim(); 822 simple_unlock(&uvm.swap_data_lock); 823 if (vp->v_type == VREG) { 824 crfree(sdp->swd_cred); 825 } 826 free(sdp->swd_path, M_VMSWAP); 827 free(sdp, M_VMSWAP); 828 break; 829 } 830 break; 831 832 case SWAP_OFF: 833 simple_lock(&uvm.swap_data_lock); 834 if ((sdp = swaplist_find(vp, 0)) == NULL) { 835 simple_unlock(&uvm.swap_data_lock); 836 error = ENXIO; 837 break; 838 } 839 840 /* 841 * If a device isn't in use or enabled, we 842 * can't stop swapping from it (again). 843 */ 844 if ((sdp->swd_flags & (SWF_INUSE|SWF_ENABLE)) == 0) { 845 simple_unlock(&uvm.swap_data_lock); 846 error = EBUSY; 847 break; 848 } 849 850 /* 851 * do the real work. 852 */ 853 error = swap_off(p, sdp); 854 break; 855 856 default: 857 error = EINVAL; 858 } 859 860 /* 861 * done! release the ref gained by namei() and unlock. 862 */ 863 vput(vp); 864 865 out: 866 rw_exit_write(&swap_syscall_lock); 867 868 return (error); 869 } 870 871 /* 872 * swap_on: attempt to enable a swapdev for swapping. note that the 873 * swapdev is already on the global list, but disabled (marked 874 * SWF_FAKE). 875 * 876 * => we avoid the start of the disk (to protect disk labels) 877 * => we also avoid the miniroot, if we are swapping to root. 878 * => caller should leave uvm.swap_data_lock unlocked, we may lock it 879 * if needed. 880 */ 881 int 882 swap_on(struct proc *p, struct swapdev *sdp) 883 { 884 static int count = 0; /* static */ 885 struct vnode *vp; 886 int error, npages, nblocks, size; 887 long addr; 888 struct vattr va; 889 #if defined(NFSCLIENT) 890 extern struct vops nfs_vops; 891 #endif /* defined(NFSCLIENT) */ 892 dev_t dev; 893 894 /* 895 * we want to enable swapping on sdp. the swd_vp contains 896 * the vnode we want (locked and ref'd), and the swd_dev 897 * contains the dev_t of the file, if it a block device. 898 */ 899 900 vp = sdp->swd_vp; 901 dev = sdp->swd_dev; 902 903 #if NVND > 0 904 /* no swapping to vnds. */ 905 if (bdevsw[major(dev)].d_strategy == vndstrategy) 906 return (EOPNOTSUPP); 907 #endif 908 909 /* 910 * open the swap file (mostly useful for block device files to 911 * let device driver know what is up). 912 * 913 * we skip the open/close for root on swap because the root 914 * has already been opened when root was mounted (mountroot). 915 */ 916 if (vp != rootvp) { 917 if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p))) 918 return (error); 919 } 920 921 /* XXX this only works for block devices */ 922 /* 923 * we now need to determine the size of the swap area. for 924 * block specials we can call the d_psize function. 925 * for normal files, we must stat [get attrs]. 926 * 927 * we put the result in nblks. 928 * for normal files, we also want the filesystem block size 929 * (which we get with statfs). 930 */ 931 switch (vp->v_type) { 932 case VBLK: 933 if (bdevsw[major(dev)].d_psize == 0 || 934 (nblocks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) { 935 error = ENXIO; 936 goto bad; 937 } 938 break; 939 940 case VREG: 941 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p))) 942 goto bad; 943 nblocks = (int)btodb(va.va_size); 944 if ((error = 945 VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0) 946 goto bad; 947 948 sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize; 949 /* 950 * limit the max # of outstanding I/O requests we issue 951 * at any one time. take it easy on NFS servers. 952 */ 953 #if defined(NFSCLIENT) 954 if (vp->v_op == &nfs_vops) 955 sdp->swd_maxactive = 2; /* XXX */ 956 else 957 #endif /* defined(NFSCLIENT) */ 958 sdp->swd_maxactive = 8; /* XXX */ 959 bufq_init(&sdp->swd_bufq, BUFQ_FIFO); 960 break; 961 962 default: 963 error = ENXIO; 964 goto bad; 965 } 966 967 /* 968 * save nblocks in a safe place and convert to pages. 969 */ 970 971 sdp->swd_nblks = nblocks; 972 npages = dbtob((u_int64_t)nblocks) >> PAGE_SHIFT; 973 974 /* 975 * for block special files, we want to make sure that leave 976 * the disklabel and bootblocks alone, so we arrange to skip 977 * over them (arbitrarily choosing to skip PAGE_SIZE bytes). 978 * note that because of this the "size" can be less than the 979 * actual number of blocks on the device. 980 */ 981 if (vp->v_type == VBLK) { 982 /* we use pages 1 to (size - 1) [inclusive] */ 983 size = npages - 1; 984 addr = 1; 985 } else { 986 /* we use pages 0 to (size - 1) [inclusive] */ 987 size = npages; 988 addr = 0; 989 } 990 991 /* 992 * make sure we have enough blocks for a reasonable sized swap 993 * area. we want at least one page. 994 */ 995 996 if (size < 1) { 997 error = EINVAL; 998 goto bad; 999 } 1000 1001 /* 1002 * now we need to allocate an extent to manage this swap device 1003 */ 1004 snprintf(sdp->swd_exname, sizeof(sdp->swd_exname), "swap0x%04x", 1005 count++); 1006 1007 /* note that extent_create's 3rd arg is inclusive, thus "- 1" */ 1008 sdp->swd_ex = extent_create(sdp->swd_exname, 0, npages - 1, M_VMSWAP, 1009 0, 0, EX_WAITOK); 1010 /* allocate the `saved' region from the extent so it won't be used */ 1011 if (addr) { 1012 if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK)) 1013 panic("disklabel region"); 1014 } 1015 1016 /* 1017 * if the vnode we are swapping to is the root vnode 1018 * (i.e. we are swapping to the miniroot) then we want 1019 * to make sure we don't overwrite it. do a statfs to 1020 * find its size and skip over it. 1021 */ 1022 if (vp == rootvp) { 1023 struct mount *mp; 1024 struct statfs *sp; 1025 int rootblocks, rootpages; 1026 1027 mp = rootvnode->v_mount; 1028 sp = &mp->mnt_stat; 1029 rootblocks = sp->f_blocks * btodb(sp->f_bsize); 1030 rootpages = round_page(dbtob((u_int64_t)rootblocks)) 1031 >> PAGE_SHIFT; 1032 if (rootpages >= size) 1033 panic("swap_on: miniroot larger than swap?"); 1034 1035 if (extent_alloc_region(sdp->swd_ex, addr, 1036 rootpages, EX_WAITOK)) 1037 panic("swap_on: unable to preserve miniroot"); 1038 1039 size -= rootpages; 1040 printf("Preserved %d pages of miniroot ", rootpages); 1041 printf("leaving %d pages of swap\n", size); 1042 } 1043 1044 /* 1045 * add a ref to vp to reflect usage as a swap device. 1046 */ 1047 vref(vp); 1048 1049 #ifdef UVM_SWAP_ENCRYPT 1050 if (uvm_doswapencrypt) 1051 uvm_swap_initcrypt(sdp, npages); 1052 #endif 1053 /* 1054 * now add the new swapdev to the drum and enable. 1055 */ 1056 simple_lock(&uvm.swap_data_lock); 1057 swapdrum_add(sdp, npages); 1058 sdp->swd_npages = size; 1059 sdp->swd_flags &= ~SWF_FAKE; /* going live */ 1060 sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE); 1061 uvmexp.swpages += size; 1062 simple_unlock(&uvm.swap_data_lock); 1063 return (0); 1064 1065 bad: 1066 /* 1067 * failure: close device if necessary and return error. 1068 */ 1069 if (vp != rootvp) 1070 (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); 1071 return (error); 1072 } 1073 1074 /* 1075 * swap_off: stop swapping on swapdev 1076 * 1077 * => swap data should be locked, we will unlock. 1078 */ 1079 int 1080 swap_off(struct proc *p, struct swapdev *sdp) 1081 { 1082 int error = 0; 1083 1084 /* disable the swap area being removed */ 1085 sdp->swd_flags &= ~SWF_ENABLE; 1086 simple_unlock(&uvm.swap_data_lock); 1087 1088 /* 1089 * the idea is to find all the pages that are paged out to this 1090 * device, and page them all in. in uvm, swap-backed pageable 1091 * memory can take two forms: aobjs and anons. call the 1092 * swapoff hook for each subsystem to bring in pages. 1093 */ 1094 1095 if (uao_swap_off(sdp->swd_drumoffset, 1096 sdp->swd_drumoffset + sdp->swd_drumsize) || 1097 amap_swap_off(sdp->swd_drumoffset, 1098 sdp->swd_drumoffset + sdp->swd_drumsize)) { 1099 1100 error = ENOMEM; 1101 } else if (sdp->swd_npginuse > sdp->swd_npgbad) { 1102 error = EBUSY; 1103 } 1104 1105 if (error) { 1106 simple_lock(&uvm.swap_data_lock); 1107 sdp->swd_flags |= SWF_ENABLE; 1108 simple_unlock(&uvm.swap_data_lock); 1109 return (error); 1110 } 1111 1112 /* 1113 * done with the vnode and saved creds. 1114 * drop our ref on the vnode before calling VOP_CLOSE() 1115 * so that spec_close() can tell if this is the last close. 1116 */ 1117 if (sdp->swd_vp->v_type == VREG) { 1118 crfree(sdp->swd_cred); 1119 } 1120 vrele(sdp->swd_vp); 1121 if (sdp->swd_vp != rootvp) { 1122 (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p); 1123 } 1124 1125 simple_lock(&uvm.swap_data_lock); 1126 uvmexp.swpages -= sdp->swd_npages; 1127 1128 if (swaplist_find(sdp->swd_vp, 1) == NULL) 1129 panic("swap_off: swapdev not in list"); 1130 swaplist_trim(); 1131 1132 /* 1133 * free all resources! 1134 */ 1135 extent_free(swapmap, sdp->swd_drumoffset, sdp->swd_drumsize, 1136 EX_WAITOK); 1137 extent_destroy(sdp->swd_ex); 1138 free(sdp, M_VMSWAP); 1139 simple_unlock(&uvm.swap_data_lock); 1140 return (0); 1141 } 1142 1143 /* 1144 * /dev/drum interface and i/o functions 1145 */ 1146 1147 /* 1148 * swstrategy: perform I/O on the drum 1149 * 1150 * => we must map the i/o request from the drum to the correct swapdev. 1151 */ 1152 void 1153 swstrategy(struct buf *bp) 1154 { 1155 struct swapdev *sdp; 1156 int s, pageno, bn; 1157 1158 /* 1159 * convert block number to swapdev. note that swapdev can't 1160 * be yanked out from under us because we are holding resources 1161 * in it (i.e. the blocks we are doing I/O on). 1162 */ 1163 pageno = dbtob((u_int64_t)bp->b_blkno) >> PAGE_SHIFT; 1164 simple_lock(&uvm.swap_data_lock); 1165 sdp = swapdrum_getsdp(pageno); 1166 simple_unlock(&uvm.swap_data_lock); 1167 if (sdp == NULL) { 1168 bp->b_error = EINVAL; 1169 bp->b_flags |= B_ERROR; 1170 s = splbio(); 1171 biodone(bp); 1172 splx(s); 1173 return; 1174 } 1175 1176 /* 1177 * convert drum page number to block number on this swapdev. 1178 */ 1179 1180 pageno -= sdp->swd_drumoffset; /* page # on swapdev */ 1181 bn = btodb((u_int64_t)pageno << PAGE_SHIFT); /* convert to diskblock */ 1182 1183 /* 1184 * for block devices we finish up here. 1185 * for regular files we have to do more work which we delegate 1186 * to sw_reg_strategy(). 1187 */ 1188 1189 switch (sdp->swd_vp->v_type) { 1190 default: 1191 panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type); 1192 1193 case VBLK: 1194 1195 /* 1196 * must convert "bp" from an I/O on /dev/drum to an I/O 1197 * on the swapdev (sdp). 1198 */ 1199 s = splbio(); 1200 buf_replacevnode(bp, sdp->swd_vp); 1201 1202 bp->b_blkno = bn; 1203 splx(s); 1204 VOP_STRATEGY(bp); 1205 return; 1206 1207 case VREG: 1208 /* 1209 * delegate to sw_reg_strategy function. 1210 */ 1211 sw_reg_strategy(sdp, bp, bn); 1212 return; 1213 } 1214 /* NOTREACHED */ 1215 } 1216 1217 /* 1218 * sw_reg_strategy: handle swap i/o to regular files 1219 */ 1220 void 1221 sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn) 1222 { 1223 struct vnode *vp; 1224 struct vndxfer *vnx; 1225 daddr64_t nbn; 1226 caddr_t addr; 1227 off_t byteoff; 1228 int s, off, nra, error, sz, resid; 1229 1230 /* 1231 * allocate a vndxfer head for this transfer and point it to 1232 * our buffer. 1233 */ 1234 getvndxfer(vnx); 1235 vnx->vx_flags = VX_BUSY; 1236 vnx->vx_error = 0; 1237 vnx->vx_pending = 0; 1238 vnx->vx_bp = bp; 1239 vnx->vx_sdp = sdp; 1240 1241 /* 1242 * setup for main loop where we read filesystem blocks into 1243 * our buffer. 1244 */ 1245 error = 0; 1246 bp->b_resid = bp->b_bcount; /* nothing transferred yet! */ 1247 addr = bp->b_data; /* current position in buffer */ 1248 byteoff = dbtob((u_int64_t)bn); 1249 1250 for (resid = bp->b_resid; resid; resid -= sz) { 1251 struct vndbuf *nbp; 1252 1253 /* 1254 * translate byteoffset into block number. return values: 1255 * vp = vnode of underlying device 1256 * nbn = new block number (on underlying vnode dev) 1257 * nra = num blocks we can read-ahead (excludes requested 1258 * block) 1259 */ 1260 nra = 0; 1261 error = VOP_BMAP(sdp->swd_vp, byteoff / sdp->swd_bsize, 1262 &vp, &nbn, &nra); 1263 1264 if (error == 0 && nbn == (daddr64_t)-1) { 1265 /* 1266 * this used to just set error, but that doesn't 1267 * do the right thing. Instead, it causes random 1268 * memory errors. The panic() should remain until 1269 * this condition doesn't destabilize the system. 1270 */ 1271 #if 1 1272 panic("sw_reg_strategy: swap to sparse file"); 1273 #else 1274 error = EIO; /* failure */ 1275 #endif 1276 } 1277 1278 /* 1279 * punt if there was an error or a hole in the file. 1280 * we must wait for any i/o ops we have already started 1281 * to finish before returning. 1282 * 1283 * XXX we could deal with holes here but it would be 1284 * a hassle (in the write case). 1285 */ 1286 if (error) { 1287 s = splbio(); 1288 vnx->vx_error = error; /* pass error up */ 1289 goto out; 1290 } 1291 1292 /* 1293 * compute the size ("sz") of this transfer (in bytes). 1294 */ 1295 off = byteoff % sdp->swd_bsize; 1296 sz = (1 + nra) * sdp->swd_bsize - off; 1297 if (sz > resid) 1298 sz = resid; 1299 1300 /* 1301 * now get a buf structure. note that the vb_buf is 1302 * at the front of the nbp structure so that you can 1303 * cast pointers between the two structure easily. 1304 */ 1305 getvndbuf(nbp); 1306 nbp->vb_buf.b_flags = bp->b_flags | B_CALL; 1307 nbp->vb_buf.b_bcount = sz; 1308 nbp->vb_buf.b_bufsize = sz; 1309 nbp->vb_buf.b_error = 0; 1310 nbp->vb_buf.b_data = addr; 1311 nbp->vb_buf.b_bq = NULL; 1312 nbp->vb_buf.b_blkno = nbn + btodb(off); 1313 nbp->vb_buf.b_proc = bp->b_proc; 1314 nbp->vb_buf.b_iodone = sw_reg_iodone; 1315 nbp->vb_buf.b_vp = NULLVP; 1316 nbp->vb_buf.b_vnbufs.le_next = NOLIST; 1317 LIST_INIT(&nbp->vb_buf.b_dep); 1318 1319 /* 1320 * set b_dirtyoff/end and b_validoff/end. this is 1321 * required by the NFS client code (otherwise it will 1322 * just discard our I/O request). 1323 */ 1324 if (bp->b_dirtyend == 0) { 1325 nbp->vb_buf.b_dirtyoff = 0; 1326 nbp->vb_buf.b_dirtyend = sz; 1327 } else { 1328 nbp->vb_buf.b_dirtyoff = 1329 max(0, bp->b_dirtyoff - (bp->b_bcount-resid)); 1330 nbp->vb_buf.b_dirtyend = 1331 min(sz, 1332 max(0, bp->b_dirtyend - (bp->b_bcount-resid))); 1333 } 1334 if (bp->b_validend == 0) { 1335 nbp->vb_buf.b_validoff = 0; 1336 nbp->vb_buf.b_validend = sz; 1337 } else { 1338 nbp->vb_buf.b_validoff = 1339 max(0, bp->b_validoff - (bp->b_bcount-resid)); 1340 nbp->vb_buf.b_validend = 1341 min(sz, 1342 max(0, bp->b_validend - (bp->b_bcount-resid))); 1343 } 1344 1345 nbp->vb_xfer = vnx; /* patch it back in to vnx */ 1346 1347 /* XXX: In case the underlying bufq is disksort: */ 1348 nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno; 1349 1350 s = splbio(); 1351 if (vnx->vx_error != 0) { 1352 putvndbuf(nbp); 1353 goto out; 1354 } 1355 vnx->vx_pending++; 1356 1357 /* assoc new buffer with underlying vnode */ 1358 bgetvp(vp, &nbp->vb_buf); 1359 1360 /* start I/O if we are not over our limit */ 1361 bufq_queue(&sdp->swd_bufq, &nbp->vb_buf); 1362 sw_reg_start(sdp); 1363 splx(s); 1364 1365 /* 1366 * advance to the next I/O 1367 */ 1368 byteoff += sz; 1369 addr += sz; 1370 } 1371 1372 s = splbio(); 1373 1374 out: /* Arrive here at splbio */ 1375 vnx->vx_flags &= ~VX_BUSY; 1376 if (vnx->vx_pending == 0) { 1377 if (vnx->vx_error != 0) { 1378 bp->b_error = vnx->vx_error; 1379 bp->b_flags |= B_ERROR; 1380 } 1381 putvndxfer(vnx); 1382 biodone(bp); 1383 } 1384 splx(s); 1385 } 1386 1387 /* sw_reg_start: start an I/O request on the requested swapdev. */ 1388 void 1389 sw_reg_start(struct swapdev *sdp) 1390 { 1391 struct buf *bp; 1392 1393 /* XXX: recursion control */ 1394 if ((sdp->swd_flags & SWF_BUSY) != 0) 1395 return; 1396 1397 sdp->swd_flags |= SWF_BUSY; 1398 1399 while (sdp->swd_active < sdp->swd_maxactive) { 1400 bp = bufq_dequeue(&sdp->swd_bufq); 1401 if (bp == NULL) 1402 break; 1403 1404 sdp->swd_active++; 1405 1406 if ((bp->b_flags & B_READ) == 0) 1407 bp->b_vp->v_numoutput++; 1408 1409 VOP_STRATEGY(bp); 1410 } 1411 sdp->swd_flags &= ~SWF_BUSY; 1412 } 1413 1414 /* 1415 * sw_reg_iodone: one of our i/o's has completed and needs post-i/o cleanup 1416 * 1417 * => note that we can recover the vndbuf struct by casting the buf ptr 1418 * 1419 * XXX: 1420 * We only put this onto a workq here, because of the maxactive game since 1421 * it basically requires us to call back into VOP_STRATEGY() (where we must 1422 * be able to sleep) via sw_reg_start(). 1423 */ 1424 void 1425 sw_reg_iodone(struct buf *bp) 1426 { 1427 struct bufq_swapreg *bq; 1428 1429 bq = (struct bufq_swapreg *)&bp->b_bufq; 1430 1431 workq_queue_task(NULL, &bq->bqf_wqtask, 0, 1432 (workq_fn)sw_reg_iodone_internal, bp, NULL); 1433 } 1434 1435 void 1436 sw_reg_iodone_internal(void *arg0, void *unused) 1437 { 1438 struct vndbuf *vbp = (struct vndbuf *)arg0; 1439 struct vndxfer *vnx = vbp->vb_xfer; 1440 struct buf *pbp = vnx->vx_bp; /* parent buffer */ 1441 struct swapdev *sdp = vnx->vx_sdp; 1442 int resid, s; 1443 1444 s = splbio(); 1445 1446 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 1447 pbp->b_resid -= resid; 1448 vnx->vx_pending--; 1449 1450 /* pass error upward */ 1451 if (vbp->vb_buf.b_error) 1452 vnx->vx_error = vbp->vb_buf.b_error; 1453 1454 /* 1455 * disassociate this buffer from the vnode (if any). 1456 */ 1457 if (vbp->vb_buf.b_vp != NULL) { 1458 brelvp(&vbp->vb_buf); 1459 } 1460 1461 /* 1462 * kill vbp structure 1463 */ 1464 putvndbuf(vbp); 1465 1466 /* 1467 * wrap up this transaction if it has run to completion or, in 1468 * case of an error, when all auxiliary buffers have returned. 1469 */ 1470 if (vnx->vx_error != 0) { 1471 /* pass error upward */ 1472 pbp->b_flags |= B_ERROR; 1473 pbp->b_error = vnx->vx_error; 1474 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 1475 putvndxfer(vnx); 1476 biodone(pbp); 1477 } 1478 } else if (pbp->b_resid == 0) { 1479 KASSERT(vnx->vx_pending == 0); 1480 if ((vnx->vx_flags & VX_BUSY) == 0) { 1481 putvndxfer(vnx); 1482 biodone(pbp); 1483 } 1484 } 1485 1486 /* 1487 * done! start next swapdev I/O if one is pending 1488 */ 1489 sdp->swd_active--; 1490 sw_reg_start(sdp); 1491 splx(s); 1492 } 1493 1494 1495 /* 1496 * uvm_swap_alloc: allocate space on swap 1497 * 1498 * => allocation is done "round robin" down the priority list, as we 1499 * allocate in a priority we "rotate" the circle queue. 1500 * => space can be freed with uvm_swap_free 1501 * => we return the page slot number in /dev/drum (0 == invalid slot) 1502 * => we lock uvm.swap_data_lock 1503 * => XXXMRG: "LESSOK" INTERFACE NEEDED TO EXTENT SYSTEM 1504 */ 1505 int 1506 uvm_swap_alloc(int *nslots, boolean_t lessok) 1507 { 1508 struct swapdev *sdp; 1509 struct swappri *spp; 1510 u_long result; 1511 1512 /* 1513 * no swap devices configured yet? definite failure. 1514 */ 1515 if (uvmexp.nswapdev < 1) 1516 return 0; 1517 1518 /* 1519 * lock data lock, convert slots into blocks, and enter loop 1520 */ 1521 simple_lock(&uvm.swap_data_lock); 1522 1523 ReTry: /* XXXMRG */ 1524 for (spp = LIST_FIRST(&swap_priority); spp != NULL; 1525 spp = LIST_NEXT(spp, spi_swappri)) { 1526 for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev); 1527 sdp != (void *)&spp->spi_swapdev; 1528 sdp = CIRCLEQ_NEXT(sdp,swd_next)) { 1529 /* if it's not enabled, then we can't swap from it */ 1530 if ((sdp->swd_flags & SWF_ENABLE) == 0) 1531 continue; 1532 if (sdp->swd_npginuse + *nslots > sdp->swd_npages) 1533 continue; 1534 if (extent_alloc(sdp->swd_ex, *nslots, EX_NOALIGN, 0, 1535 EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT, 1536 &result) != 0) { 1537 continue; 1538 } 1539 1540 /* 1541 * successful allocation! now rotate the circleq. 1542 */ 1543 CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next); 1544 CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); 1545 sdp->swd_npginuse += *nslots; 1546 uvmexp.swpginuse += *nslots; 1547 simple_unlock(&uvm.swap_data_lock); 1548 /* done! return drum slot number */ 1549 return(result + sdp->swd_drumoffset); 1550 } 1551 } 1552 1553 /* XXXMRG: BEGIN HACK */ 1554 if (*nslots > 1 && lessok) { 1555 *nslots = 1; 1556 goto ReTry; /* XXXMRG: ugh! extent should support this for us */ 1557 } 1558 /* XXXMRG: END HACK */ 1559 1560 simple_unlock(&uvm.swap_data_lock); 1561 return 0; /* failed */ 1562 } 1563 1564 /* 1565 * uvm_swap_markbad: keep track of swap ranges where we've had i/o errors 1566 * 1567 * => we lock uvm.swap_data_lock 1568 */ 1569 void 1570 uvm_swap_markbad(int startslot, int nslots) 1571 { 1572 struct swapdev *sdp; 1573 1574 simple_lock(&uvm.swap_data_lock); 1575 sdp = swapdrum_getsdp(startslot); 1576 if (sdp != NULL) { 1577 /* 1578 * we just keep track of how many pages have been marked bad 1579 * in this device, to make everything add up in swap_off(). 1580 * we assume here that the range of slots will all be within 1581 * one swap device. 1582 */ 1583 sdp->swd_npgbad += nslots; 1584 } 1585 simple_unlock(&uvm.swap_data_lock); 1586 } 1587 1588 /* 1589 * uvm_swap_free: free swap slots 1590 * 1591 * => this can be all or part of an allocation made by uvm_swap_alloc 1592 * => we lock uvm.swap_data_lock 1593 */ 1594 void 1595 uvm_swap_free(int startslot, int nslots) 1596 { 1597 struct swapdev *sdp; 1598 1599 /* 1600 * ignore attempts to free the "bad" slot. 1601 */ 1602 1603 if (startslot == SWSLOT_BAD) { 1604 return; 1605 } 1606 1607 /* 1608 * convert drum slot offset back to sdp, free the blocks 1609 * in the extent, and return. must hold pri lock to do 1610 * lookup and access the extent. 1611 */ 1612 1613 simple_lock(&uvm.swap_data_lock); 1614 sdp = swapdrum_getsdp(startslot); 1615 KASSERT(uvmexp.nswapdev >= 1); 1616 KASSERT(sdp != NULL); 1617 KASSERT(sdp->swd_npginuse >= nslots); 1618 if (extent_free(sdp->swd_ex, startslot - sdp->swd_drumoffset, nslots, 1619 EX_MALLOCOK|EX_NOWAIT) != 0) { 1620 printf("warning: resource shortage: %d pages of swap lost\n", 1621 nslots); 1622 } 1623 1624 sdp->swd_npginuse -= nslots; 1625 uvmexp.swpginuse -= nslots; 1626 #ifdef UVM_SWAP_ENCRYPT 1627 { 1628 int i; 1629 if (swap_encrypt_initialized) { 1630 /* Dereference keys */ 1631 for (i = 0; i < nslots; i++) 1632 if (uvm_swap_needdecrypt(sdp, startslot + i)) { 1633 struct swap_key *key; 1634 1635 key = SWD_KEY(sdp, startslot + i); 1636 if (key->refcount != 0) 1637 SWAP_KEY_PUT(sdp, key); 1638 } 1639 1640 /* Mark range as not decrypt */ 1641 uvm_swap_markdecrypt(sdp, startslot, nslots, 0); 1642 } 1643 } 1644 #endif /* UVM_SWAP_ENCRYPT */ 1645 simple_unlock(&uvm.swap_data_lock); 1646 } 1647 1648 /* 1649 * uvm_swap_put: put any number of pages into a contig place on swap 1650 * 1651 * => can be sync or async 1652 * => XXXMRG: consider making it an inline or macro 1653 */ 1654 int 1655 uvm_swap_put(int swslot, struct vm_page **ppsp, int npages, int flags) 1656 { 1657 int result; 1658 1659 result = uvm_swap_io(ppsp, swslot, npages, B_WRITE | 1660 ((flags & PGO_SYNCIO) ? 0 : B_ASYNC)); 1661 1662 return (result); 1663 } 1664 1665 /* 1666 * uvm_swap_get: get a single page from swap 1667 * 1668 * => usually a sync op (from fault) 1669 * => XXXMRG: consider making it an inline or macro 1670 */ 1671 int 1672 uvm_swap_get(struct vm_page *page, int swslot, int flags) 1673 { 1674 int result; 1675 1676 uvmexp.nswget++; 1677 KASSERT(flags & PGO_SYNCIO); 1678 if (swslot == SWSLOT_BAD) { 1679 return VM_PAGER_ERROR; 1680 } 1681 1682 /* 1683 * this page is (about to be) no longer only in swap. 1684 */ 1685 simple_lock(&uvm.swap_data_lock); 1686 uvmexp.swpgonly--; 1687 simple_unlock(&uvm.swap_data_lock); 1688 1689 result = uvm_swap_io(&page, swslot, 1, B_READ | 1690 ((flags & PGO_SYNCIO) ? 0 : B_ASYNC)); 1691 1692 if (result != VM_PAGER_OK && result != VM_PAGER_PEND) { 1693 /* 1694 * oops, the read failed so it really is still only in swap. 1695 */ 1696 simple_lock(&uvm.swap_data_lock); 1697 uvmexp.swpgonly++; 1698 simple_unlock(&uvm.swap_data_lock); 1699 } 1700 1701 return (result); 1702 } 1703 1704 /* 1705 * uvm_swap_io: do an i/o operation to swap 1706 */ 1707 1708 int 1709 uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags) 1710 { 1711 daddr64_t startblk; 1712 struct buf *bp; 1713 vaddr_t kva; 1714 int result, s, mapinflags, pflag, bounce = 0, i; 1715 boolean_t write, async; 1716 vaddr_t bouncekva; 1717 struct vm_page *tpps[MAXBSIZE >> PAGE_SHIFT]; 1718 #ifdef UVM_SWAP_ENCRYPT 1719 struct swapdev *sdp; 1720 int encrypt = 0; 1721 #endif 1722 1723 write = (flags & B_READ) == 0; 1724 async = (flags & B_ASYNC) != 0; 1725 1726 /* 1727 * convert starting drum slot to block number 1728 */ 1729 startblk = btodb((u_int64_t)startslot << PAGE_SHIFT); 1730 1731 /* 1732 * first, map the pages into the kernel (XXX: currently required 1733 * by buffer system). 1734 */ 1735 mapinflags = !write ? UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE; 1736 if (!async) 1737 mapinflags |= UVMPAGER_MAPIN_WAITOK; 1738 kva = uvm_pagermapin(pps, npages, mapinflags); 1739 if (kva == 0) 1740 return (VM_PAGER_AGAIN); 1741 1742 #ifdef UVM_SWAP_ENCRYPT 1743 if (write) { 1744 /* 1745 * Check if we need to do swap encryption on old pages. 1746 * Later we need a different scheme, that swap encrypts 1747 * all pages of a process that had at least one page swap 1748 * encrypted. Then we might not need to copy all pages 1749 * in the cluster, and avoid the memory overheard in 1750 * swapping. 1751 */ 1752 if (uvm_doswapencrypt) 1753 encrypt = 1; 1754 } 1755 1756 if (swap_encrypt_initialized || encrypt) { 1757 /* 1758 * we need to know the swap device that we are swapping to/from 1759 * to see if the pages need to be marked for decryption or 1760 * actually need to be decrypted. 1761 * XXX - does this information stay the same over the whole 1762 * execution of this function? 1763 */ 1764 simple_lock(&uvm.swap_data_lock); 1765 sdp = swapdrum_getsdp(startslot); 1766 simple_unlock(&uvm.swap_data_lock); 1767 } 1768 1769 /* 1770 * Check that we are dma capable for read (write always bounces 1771 * through the swapencrypt anyway... 1772 */ 1773 if (write && encrypt) { 1774 bounce = 1; /* bounce through swapencrypt always */ 1775 } else { 1776 #else 1777 { 1778 #endif 1779 1780 for (i = 0; i < npages; i++) { 1781 if (VM_PAGE_TO_PHYS(pps[i]) < dma_constraint.ucr_low || 1782 VM_PAGE_TO_PHYS(pps[i]) > dma_constraint.ucr_high) { 1783 bounce = 1; 1784 break; 1785 } 1786 } 1787 } 1788 1789 if (bounce) { 1790 int swmapflags; 1791 1792 /* We always need write access. */ 1793 swmapflags = UVMPAGER_MAPIN_READ; 1794 if (!async) 1795 swmapflags |= UVMPAGER_MAPIN_WAITOK; 1796 1797 if (!uvm_swap_allocpages(tpps, npages)) { 1798 uvm_pagermapout(kva, npages); 1799 return (VM_PAGER_AGAIN); 1800 } 1801 1802 bouncekva = uvm_pagermapin(tpps, npages, swmapflags); 1803 if (bouncekva == 0) { 1804 uvm_pagermapout(kva, npages); 1805 uvm_swap_freepages(tpps, npages); 1806 return (VM_PAGER_AGAIN); 1807 } 1808 } 1809 1810 /* 1811 * encrypt to swap 1812 */ 1813 if (write && bounce) { 1814 int i, opages; 1815 caddr_t src, dst; 1816 u_int64_t block; 1817 1818 src = (caddr_t) kva; 1819 dst = (caddr_t) bouncekva; 1820 block = startblk; 1821 for (i = 0; i < npages; i++) { 1822 #ifdef UVM_SWAP_ENCRYPT 1823 struct swap_key *key; 1824 1825 if (encrypt) { 1826 key = SWD_KEY(sdp, startslot + i); 1827 SWAP_KEY_GET(sdp, key); /* add reference */ 1828 1829 swap_encrypt(key, src, dst, block, 1830 1 << PAGE_SHIFT); 1831 block += btodb(1 << PAGE_SHIFT); 1832 } else { 1833 #else 1834 { 1835 #endif /* UVM_SWAP_ENCRYPT */ 1836 memcpy(dst, src, PAGE_SIZE); 1837 } 1838 /* this just tells async callbacks to free */ 1839 atomic_setbits_int(&tpps[i]->pg_flags, PQ_ENCRYPT); 1840 src += 1 << PAGE_SHIFT; 1841 dst += 1 << PAGE_SHIFT; 1842 } 1843 1844 uvm_pagermapout(kva, npages); 1845 1846 /* dispose of pages we dont use anymore */ 1847 opages = npages; 1848 uvm_pager_dropcluster(NULL, NULL, pps, &opages, 1849 PGO_PDFREECLUST); 1850 1851 kva = bouncekva; 1852 } 1853 1854 /* 1855 * now allocate a buf for the i/o. 1856 * [make sure we don't put the pagedaemon to sleep...] 1857 */ 1858 s = splbio(); 1859 pflag = (async || curproc == uvm.pagedaemon_proc) ? PR_NOWAIT : 1860 PR_WAITOK; 1861 bp = pool_get(&bufpool, pflag); 1862 splx(s); 1863 1864 /* 1865 * if we failed to get a swapbuf, return "try again" 1866 */ 1867 if (bp == NULL) { 1868 if (write && bounce) { 1869 #ifdef UVM_SWAP_ENCRYPT 1870 int i; 1871 1872 /* swap encrypt needs cleanup */ 1873 if (encrypt) 1874 for (i = 0; i < npages; i++) 1875 SWAP_KEY_PUT(sdp, SWD_KEY(sdp, 1876 startslot + i)); 1877 #endif 1878 1879 uvm_pagermapout(kva, npages); 1880 uvm_swap_freepages(tpps, npages); 1881 } 1882 return (VM_PAGER_AGAIN); 1883 } 1884 1885 /* 1886 * prevent ASYNC reads. 1887 * uvm_swap_io is only called from uvm_swap_get, uvm_swap_get 1888 * assumes that all gets are SYNCIO. Just make sure here. 1889 * XXXARTUBC - might not be true anymore. 1890 */ 1891 if (!write) { 1892 flags &= ~B_ASYNC; 1893 async = 0; 1894 } 1895 1896 /* 1897 * fill in the bp. we currently route our i/o through 1898 * /dev/drum's vnode [swapdev_vp]. 1899 */ 1900 bp->b_flags = B_BUSY | B_NOCACHE | B_RAW | (flags & (B_READ|B_ASYNC)); 1901 bp->b_proc = &proc0; /* XXX */ 1902 bp->b_vnbufs.le_next = NOLIST; 1903 if (bounce) 1904 bp->b_data = (caddr_t)bouncekva; 1905 else 1906 bp->b_data = (caddr_t)kva; 1907 bp->b_bq = NULL; 1908 bp->b_blkno = startblk; 1909 LIST_INIT(&bp->b_dep); 1910 s = splbio(); 1911 bp->b_vp = NULL; 1912 buf_replacevnode(bp, swapdev_vp); 1913 splx(s); 1914 bp->b_bufsize = bp->b_bcount = npages << PAGE_SHIFT; 1915 1916 /* 1917 * for pageouts we must set "dirtyoff" [NFS client code needs it]. 1918 * and we bump v_numoutput (counter of number of active outputs). 1919 */ 1920 if (write) { 1921 bp->b_dirtyoff = 0; 1922 bp->b_dirtyend = npages << PAGE_SHIFT; 1923 #ifdef UVM_SWAP_ENCRYPT 1924 /* mark the pages in the drum for decryption */ 1925 if (swap_encrypt_initialized) 1926 uvm_swap_markdecrypt(sdp, startslot, npages, encrypt); 1927 #endif 1928 s = splbio(); 1929 swapdev_vp->v_numoutput++; 1930 splx(s); 1931 } 1932 1933 /* 1934 * for async ops we must set up the iodone handler. 1935 */ 1936 if (async) { 1937 bp->b_flags |= B_CALL | (curproc == uvm.pagedaemon_proc ? 1938 B_PDAEMON : 0); 1939 bp->b_iodone = uvm_aio_biodone; 1940 } 1941 1942 /* 1943 * now we start the I/O, and if async, return. 1944 */ 1945 VOP_STRATEGY(bp); 1946 if (async) 1947 return (VM_PAGER_PEND); 1948 1949 /* 1950 * must be sync i/o. wait for it to finish 1951 */ 1952 (void) biowait(bp); 1953 result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK; 1954 1955 /* 1956 * decrypt swap 1957 */ 1958 if (!write && !(bp->b_flags & B_ERROR)) { 1959 int i; 1960 caddr_t data = (caddr_t)kva; 1961 caddr_t dst = (caddr_t)kva; 1962 u_int64_t block = startblk; 1963 1964 if (bounce) 1965 data = (caddr_t)bouncekva; 1966 1967 for (i = 0; i < npages; i++) { 1968 #ifdef UVM_SWAP_ENCRYPT 1969 struct swap_key *key; 1970 1971 /* Check if we need to decrypt */ 1972 if (swap_encrypt_initialized && 1973 uvm_swap_needdecrypt(sdp, startslot + i)) { 1974 key = SWD_KEY(sdp, startslot + i); 1975 if (key->refcount == 0) { 1976 result = VM_PAGER_ERROR; 1977 break; 1978 } 1979 swap_decrypt(key, data, dst, block, 1980 1 << PAGE_SHIFT); 1981 } else if (bounce) { 1982 #else 1983 if (bounce) { 1984 #endif 1985 memcpy(dst, data, 1 << PAGE_SHIFT); 1986 } 1987 data += 1 << PAGE_SHIFT; 1988 dst += 1 << PAGE_SHIFT; 1989 block += btodb(1 << PAGE_SHIFT); 1990 } 1991 if (bounce) 1992 uvm_pagermapout(bouncekva, npages); 1993 } 1994 /* 1995 * kill the pager mapping 1996 */ 1997 uvm_pagermapout(kva, npages); 1998 1999 /* 2000 * Not anymore needed, free after encryption/bouncing 2001 */ 2002 if (!write && bounce) 2003 uvm_swap_freepages(tpps, npages); 2004 2005 /* 2006 * now dispose of the buf 2007 */ 2008 s = splbio(); 2009 if (bp->b_vp) 2010 brelvp(bp); 2011 2012 if (write && bp->b_vp) 2013 vwakeup(bp->b_vp); 2014 pool_put(&bufpool, bp); 2015 splx(s); 2016 2017 /* 2018 * finally return. 2019 */ 2020 return (result); 2021 } 2022 2023 void 2024 swapmount(void) 2025 { 2026 struct swapdev *sdp; 2027 struct swappri *spp; 2028 struct vnode *vp; 2029 dev_t swap_dev = swdevt[0].sw_dev; 2030 char *nam; 2031 2032 /* 2033 * No locking here since we happen to know that we will just be called 2034 * once before any other process has forked. 2035 */ 2036 2037 if (swap_dev == NODEV) { 2038 printf("swapmount: no device\n"); 2039 return; 2040 } 2041 2042 if (bdevvp(swap_dev, &vp)) { 2043 printf("swapmount: no device 2\n"); 2044 return; 2045 } 2046 2047 sdp = malloc(sizeof(*sdp), M_VMSWAP, M_WAITOK|M_ZERO); 2048 spp = malloc(sizeof(*spp), M_VMSWAP, M_WAITOK); 2049 2050 sdp->swd_flags = SWF_FAKE; 2051 sdp->swd_dev = swap_dev; 2052 sdp->swd_vp = vp; 2053 2054 /* Construct a potential path to swap */ 2055 sdp->swd_pathlen = MNAMELEN + 1; 2056 sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK); 2057 #if defined(NFSCLIENT) 2058 if (swap_dev == NETDEV) 2059 snprintf(sdp->swd_path, sdp->swd_pathlen, "/swap"); 2060 else 2061 #endif 2062 if ((nam = findblkname(major(swap_dev)))) 2063 snprintf(sdp->swd_path, sdp->swd_pathlen, "/dev/%s%d%c", nam, 2064 DISKUNIT(swap_dev), 'a' + DISKPART(swap_dev)); 2065 else 2066 snprintf(sdp->swd_path, sdp->swd_pathlen, "blkdev0x%x", 2067 swap_dev); 2068 sdp->swd_pathlen = strlen(sdp->swd_path) + 1; 2069 2070 swaplist_insert(sdp, spp, 0); 2071 2072 if (swap_on(curproc, sdp)) { 2073 swaplist_find(vp, 1); 2074 swaplist_trim(); 2075 vput(sdp->swd_vp); 2076 free(sdp->swd_path, M_VMSWAP); 2077 free(sdp, M_VMSWAP); 2078 return; 2079 } 2080 2081 VOP_UNLOCK(vp, 0, curproc); 2082 } 2083