1 /* $NetBSD: vnd.c,v 1.278 2021/01/04 16:17:26 mlelstv Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 66 * 67 * @(#)vn.c 8.9 (Berkeley) 5/14/95 68 */ 69 70 /* 71 * Vnode disk driver. 72 * 73 * Block/character interface to a vnode. Allows one to treat a file 74 * as a disk (e.g. build a filesystem in it, mount it, etc.). 75 * 76 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 77 * this uses them to avoid distorting the local buffer cache. If those 78 * block-level operations are not available, this falls back to the regular 79 * read and write calls. Using these may distort the cache in some cases 80 * but better have the driver working than preventing it to work on file 81 * systems where the block-level operations are not implemented for 82 * whatever reason. 83 * 84 * NOTE 2: There is a security issue involved with this driver. 85 * Once mounted all access to the contents of the "mapped" file via 86 * the special file is controlled by the permissions on the special 87 * file, the protection of the mapped file is ignored (effectively, 88 * by using root credentials in all transactions). 89 * 90 * NOTE 3: Doesn't interact with leases, should it? 91 */ 92 93 #include <sys/cdefs.h> 94 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.278 2021/01/04 16:17:26 mlelstv Exp $"); 95 96 #if defined(_KERNEL_OPT) 97 #include "opt_vnd.h" 98 #include "opt_compat_netbsd.h" 99 #endif 100 101 #include <sys/param.h> 102 #include <sys/systm.h> 103 #include <sys/namei.h> 104 #include <sys/proc.h> 105 #include <sys/kthread.h> 106 #include <sys/errno.h> 107 #include <sys/buf.h> 108 #include <sys/bufq.h> 109 #include <sys/malloc.h> 110 #include <sys/ioctl.h> 111 #include <sys/disklabel.h> 112 #include <sys/device.h> 113 #include <sys/disk.h> 114 #include <sys/stat.h> 115 #include <sys/mount.h> 116 #include <sys/vnode.h> 117 #include <sys/fstrans.h> 118 #include <sys/file.h> 119 #include <sys/uio.h> 120 #include <sys/conf.h> 121 #include <sys/kauth.h> 122 #include <sys/module.h> 123 #include <sys/compat_stub.h> 124 #include <sys/atomic.h> 125 126 #include <net/zlib.h> 127 128 #include <miscfs/genfs/genfs.h> 129 #include <miscfs/specfs/specdev.h> 130 131 #include <dev/dkvar.h> 132 #include <dev/vndvar.h> 133 134 #include "ioconf.h" 135 136 #if defined(VNDDEBUG) && !defined(DEBUG) 137 #define DEBUG 138 #endif 139 140 #ifdef DEBUG 141 int dovndcluster = 1; 142 #define VDB_FOLLOW 0x01 143 #define VDB_INIT 0x02 144 #define VDB_IO 0x04 145 #define VDB_LABEL 0x08 146 int vnddebug = 0; 147 #endif 148 149 #define vndunit(x) DISKUNIT(x) 150 151 struct vndxfer { 152 struct buf vx_buf; 153 struct vnd_softc *vx_vnd; 154 }; 155 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 156 157 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 158 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 159 160 #define VNDLABELDEV(dev) \ 161 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 162 163 #define VND_MAXPENDING(vnd) ((vnd)->sc_maxactive * 4) 164 #define VND_MAXPAGES(vnd) (1024 * 1024 / PAGE_SIZE) 165 166 167 static void vndclear(struct vnd_softc *, int); 168 static int vnddoclear(struct vnd_softc *, int, int, bool); 169 static int vndsetcred(struct vnd_softc *, kauth_cred_t); 170 static void vndthrottle(struct vnd_softc *, struct vnode *); 171 static void vndiodone(struct buf *); 172 #if 0 173 static void vndshutdown(void); 174 #endif 175 176 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 177 static void vndgetdisklabel(dev_t, struct vnd_softc *); 178 179 static int vndlock(struct vnd_softc *); 180 static void vndunlock(struct vnd_softc *); 181 #ifdef VND_COMPRESSION 182 static void compstrategy(struct buf *, off_t); 183 static void *vnd_alloc(void *, u_int, u_int); 184 static void vnd_free(void *, void *); 185 #endif /* VND_COMPRESSION */ 186 187 static void vndthread(void *); 188 static bool vnode_has_op(const struct vnode *, int); 189 static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 190 struct buf *); 191 static void handle_with_strategy(struct vnd_softc *, const struct buf *, 192 struct buf *); 193 static void vnd_set_geometry(struct vnd_softc *); 194 195 static dev_type_open(vndopen); 196 static dev_type_close(vndclose); 197 static dev_type_read(vndread); 198 static dev_type_write(vndwrite); 199 static dev_type_ioctl(vndioctl); 200 static dev_type_strategy(vndstrategy); 201 static dev_type_dump(vnddump); 202 static dev_type_size(vndsize); 203 204 const struct bdevsw vnd_bdevsw = { 205 .d_open = vndopen, 206 .d_close = vndclose, 207 .d_strategy = vndstrategy, 208 .d_ioctl = vndioctl, 209 .d_dump = vnddump, 210 .d_psize = vndsize, 211 .d_discard = nodiscard, 212 .d_flag = D_DISK 213 }; 214 215 const struct cdevsw vnd_cdevsw = { 216 .d_open = vndopen, 217 .d_close = vndclose, 218 .d_read = vndread, 219 .d_write = vndwrite, 220 .d_ioctl = vndioctl, 221 .d_stop = nostop, 222 .d_tty = notty, 223 .d_poll = nopoll, 224 .d_mmap = nommap, 225 .d_kqfilter = nokqfilter, 226 .d_discard = nodiscard, 227 .d_flag = D_DISK 228 }; 229 230 static int vnd_match(device_t, cfdata_t, void *); 231 static void vnd_attach(device_t, device_t, void *); 232 static int vnd_detach(device_t, int); 233 234 CFATTACH_DECL3_NEW(vnd, sizeof(struct vnd_softc), 235 vnd_match, vnd_attach, vnd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN); 236 237 static struct vnd_softc *vnd_spawn(int); 238 static int vnd_destroy(device_t); 239 240 static const struct dkdriver vnddkdriver = { 241 .d_strategy = vndstrategy, 242 .d_minphys = minphys 243 }; 244 245 void 246 vndattach(int num) 247 { 248 int error; 249 250 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 251 if (error) 252 aprint_error("%s: unable to register cfattach, error = %d\n", 253 vnd_cd.cd_name, error); 254 } 255 256 static int 257 vnd_match(device_t self, cfdata_t cfdata, void *aux) 258 { 259 260 return 1; 261 } 262 263 static void 264 vnd_attach(device_t parent, device_t self, void *aux) 265 { 266 struct vnd_softc *sc = device_private(self); 267 268 sc->sc_dev = self; 269 sc->sc_comp_offsets = NULL; 270 sc->sc_comp_buff = NULL; 271 sc->sc_comp_decombuf = NULL; 272 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 273 disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver); 274 if (!pmf_device_register(self, NULL, NULL)) 275 aprint_error_dev(self, "couldn't establish power handler\n"); 276 } 277 278 static int 279 vnd_detach(device_t self, int flags) 280 { 281 int error; 282 struct vnd_softc *sc = device_private(self); 283 284 if (sc->sc_flags & VNF_INITED) { 285 error = vnddoclear(sc, 0, -1, (flags & DETACH_FORCE) != 0); 286 if (error != 0) 287 return error; 288 } 289 290 pmf_device_deregister(self); 291 bufq_free(sc->sc_tab); 292 disk_destroy(&sc->sc_dkdev); 293 294 return 0; 295 } 296 297 static struct vnd_softc * 298 vnd_spawn(int unit) 299 { 300 cfdata_t cf; 301 302 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 303 cf->cf_name = vnd_cd.cd_name; 304 cf->cf_atname = vnd_cd.cd_name; 305 cf->cf_unit = unit; 306 cf->cf_fstate = FSTATE_STAR; 307 308 return device_private(config_attach_pseudo(cf)); 309 } 310 311 static int 312 vnd_destroy(device_t dev) 313 { 314 int error; 315 cfdata_t cf; 316 317 cf = device_cfdata(dev); 318 error = config_detach(dev, DETACH_QUIET); 319 if (error) 320 return error; 321 free(cf, M_DEVBUF); 322 return 0; 323 } 324 325 static int 326 vndopen(dev_t dev, int flags, int mode, struct lwp *l) 327 { 328 int unit = vndunit(dev); 329 struct vnd_softc *sc; 330 int error = 0, part, pmask; 331 struct disklabel *lp; 332 333 #ifdef DEBUG 334 if (vnddebug & VDB_FOLLOW) 335 printf("vndopen(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 336 #endif 337 sc = device_lookup_private(&vnd_cd, unit); 338 if (sc == NULL) { 339 sc = vnd_spawn(unit); 340 if (sc == NULL) 341 return ENOMEM; 342 343 /* compatibility, keep disklabel after close */ 344 sc->sc_flags = VNF_KLABEL; 345 } 346 347 if ((error = vndlock(sc)) != 0) 348 return error; 349 350 mutex_enter(&sc->sc_dkdev.dk_openlock); 351 352 if ((sc->sc_flags & VNF_CLEARING) != 0) { 353 error = ENXIO; 354 goto done; 355 } 356 357 lp = sc->sc_dkdev.dk_label; 358 359 part = DISKPART(dev); 360 pmask = (1 << part); 361 362 if (sc->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 363 error = EBUSY; 364 goto done; 365 } 366 367 if (sc->sc_flags & VNF_INITED) { 368 if ((sc->sc_dkdev.dk_openmask & ~(1<<RAW_PART)) != 0) { 369 /* 370 * If any non-raw partition is open, but the disk 371 * has been invalidated, disallow further opens. 372 */ 373 if ((sc->sc_flags & VNF_VLABEL) == 0) { 374 error = EIO; 375 goto done; 376 } 377 } else { 378 /* 379 * Load the partition info if not already loaded. 380 */ 381 if ((sc->sc_flags & VNF_VLABEL) == 0) { 382 sc->sc_flags |= VNF_VLABEL; 383 vndgetdisklabel(dev, sc); 384 } 385 } 386 } 387 388 /* Check that the partitions exists. */ 389 if (part != RAW_PART) { 390 if (((sc->sc_flags & VNF_INITED) == 0) || 391 ((part >= lp->d_npartitions) || 392 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 393 error = ENXIO; 394 goto done; 395 } 396 } 397 398 /* Prevent our unit from being unconfigured while open. */ 399 switch (mode) { 400 case S_IFCHR: 401 sc->sc_dkdev.dk_copenmask |= pmask; 402 break; 403 404 case S_IFBLK: 405 sc->sc_dkdev.dk_bopenmask |= pmask; 406 break; 407 } 408 sc->sc_dkdev.dk_openmask = 409 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 410 411 done: 412 mutex_exit(&sc->sc_dkdev.dk_openlock); 413 vndunlock(sc); 414 return error; 415 } 416 417 static int 418 vndclose(dev_t dev, int flags, int mode, struct lwp *l) 419 { 420 int unit = vndunit(dev); 421 struct vnd_softc *sc; 422 int error = 0, part; 423 424 #ifdef DEBUG 425 if (vnddebug & VDB_FOLLOW) 426 printf("vndclose(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 427 #endif 428 sc = device_lookup_private(&vnd_cd, unit); 429 if (sc == NULL) 430 return ENXIO; 431 432 if ((error = vndlock(sc)) != 0) 433 return error; 434 435 mutex_enter(&sc->sc_dkdev.dk_openlock); 436 437 part = DISKPART(dev); 438 439 /* ...that much closer to allowing unconfiguration... */ 440 switch (mode) { 441 case S_IFCHR: 442 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 443 break; 444 445 case S_IFBLK: 446 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 447 break; 448 } 449 sc->sc_dkdev.dk_openmask = 450 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 451 452 /* are we last opener ? */ 453 if (sc->sc_dkdev.dk_openmask == 0) { 454 if ((sc->sc_flags & VNF_KLABEL) == 0) 455 sc->sc_flags &= ~VNF_VLABEL; 456 } 457 458 mutex_exit(&sc->sc_dkdev.dk_openlock); 459 460 vndunlock(sc); 461 462 if ((sc->sc_flags & VNF_INITED) == 0) { 463 if ((error = vnd_destroy(sc->sc_dev)) != 0) { 464 aprint_error_dev(sc->sc_dev, 465 "unable to detach instance\n"); 466 return error; 467 } 468 } 469 470 return 0; 471 } 472 473 /* 474 * Queue the request, and wakeup the kernel thread to handle it. 475 */ 476 static void 477 vndstrategy(struct buf *bp) 478 { 479 int unit = vndunit(bp->b_dev); 480 struct vnd_softc *vnd = 481 device_lookup_private(&vnd_cd, unit); 482 struct disklabel *lp; 483 daddr_t blkno; 484 int s = splbio(); 485 486 if (vnd == NULL) { 487 bp->b_error = ENXIO; 488 goto done; 489 } 490 lp = vnd->sc_dkdev.dk_label; 491 492 if ((vnd->sc_flags & VNF_INITED) == 0) { 493 bp->b_error = ENXIO; 494 goto done; 495 } 496 497 /* 498 * The transfer must be a whole number of blocks. 499 */ 500 if ((bp->b_bcount % lp->d_secsize) != 0) { 501 bp->b_error = EINVAL; 502 goto done; 503 } 504 505 /* 506 * check if we're read-only. 507 */ 508 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 509 bp->b_error = EACCES; 510 goto done; 511 } 512 513 /* If it's a nil transfer, wake up the top half now. */ 514 if (bp->b_bcount == 0) { 515 goto done; 516 } 517 518 /* 519 * Do bounds checking and adjust transfer. If there's an error, 520 * the bounds check will flag that for us. 521 */ 522 if (DISKPART(bp->b_dev) == RAW_PART) { 523 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 524 vnd->sc_size) <= 0) 525 goto done; 526 } else { 527 if (bounds_check_with_label(&vnd->sc_dkdev, 528 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 529 goto done; 530 } 531 532 /* 533 * Put the block number in terms of the logical blocksize 534 * of the "device". 535 */ 536 537 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 538 539 /* 540 * Translate the partition-relative block number to an absolute. 541 */ 542 if (DISKPART(bp->b_dev) != RAW_PART) { 543 struct partition *pp; 544 545 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 546 DISKPART(bp->b_dev)]; 547 blkno += pp->p_offset; 548 } 549 bp->b_rawblkno = blkno; 550 551 #ifdef DEBUG 552 if (vnddebug & VDB_FOLLOW) 553 printf("vndstrategy(%p): unit %d\n", bp, unit); 554 #endif 555 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 556 KASSERT(vnd->sc_pending >= 0 && 557 vnd->sc_pending <= VND_MAXPENDING(vnd)); 558 while (vnd->sc_pending == VND_MAXPENDING(vnd)) 559 tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0); 560 vnd->sc_pending++; 561 } 562 bufq_put(vnd->sc_tab, bp); 563 wakeup(&vnd->sc_tab); 564 splx(s); 565 return; 566 567 done: 568 bp->b_resid = bp->b_bcount; 569 biodone(bp); 570 splx(s); 571 } 572 573 static bool 574 vnode_has_strategy(struct vnd_softc *vnd) 575 { 576 return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 577 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 578 } 579 580 /* Verify that I/O requests cannot be smaller than the 581 * smallest I/O size supported by the backend. 582 */ 583 static bool 584 vnode_has_large_blocks(struct vnd_softc *vnd) 585 { 586 u_int32_t vnd_secsize, iosize; 587 588 iosize = vnd->sc_iosize; 589 vnd_secsize = vnd->sc_geom.vng_secsize; 590 591 return vnd_secsize % iosize != 0; 592 } 593 594 /* XXX this function needs a reliable check to detect 595 * sparse files. Otherwise, bmap/strategy may be used 596 * and fail on non-allocated blocks. VOP_READ/VOP_WRITE 597 * works on sparse files. 598 */ 599 #if notyet 600 static bool 601 vnode_strategy_probe(struct vnd_softc *vnd) 602 { 603 int error; 604 daddr_t nbn; 605 606 if (!vnode_has_strategy(vnd)) 607 return false; 608 609 if (vnode_has_large_blocks(vnd)) 610 return false; 611 612 /* Convert the first logical block number to its 613 * physical block number. 614 */ 615 error = 0; 616 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 617 error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL); 618 VOP_UNLOCK(vnd->sc_vp); 619 620 /* Test if that worked. */ 621 if (error == 0 && (long)nbn == -1) 622 return false; 623 624 return true; 625 } 626 #endif 627 628 static void 629 vndthread(void *arg) 630 { 631 struct vnd_softc *vnd = arg; 632 int s; 633 634 /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to 635 * directly access the backing vnode. If we can, use these two 636 * operations to avoid messing with the local buffer cache. 637 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 638 * which are guaranteed to work with any file system. */ 639 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 640 ! vnode_has_strategy(vnd)) 641 vnd->sc_flags |= VNF_USE_VN_RDWR; 642 643 /* VOP_STRATEGY can only be used if the backing vnode allows 644 * to access blocks as small as defined by the vnd geometry. 645 */ 646 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 647 vnode_has_large_blocks(vnd)) 648 vnd->sc_flags |= VNF_USE_VN_RDWR; 649 650 #ifdef DEBUG 651 if (vnddebug & VDB_INIT) 652 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 653 (vnd->sc_flags & VNF_USE_VN_RDWR) == 0 ? 654 "using bmap/strategy operations" : 655 "using read/write operations"); 656 #endif 657 658 s = splbio(); 659 vnd->sc_flags |= VNF_KTHREAD; 660 wakeup(&vnd->sc_kthread); 661 662 /* 663 * Dequeue requests and serve them depending on the available 664 * vnode operations. 665 */ 666 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 667 struct vndxfer *vnx; 668 struct buf *obp; 669 struct buf *bp; 670 671 obp = bufq_get(vnd->sc_tab); 672 if (obp == NULL) { 673 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 674 continue; 675 }; 676 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 677 KASSERT(vnd->sc_pending > 0 && 678 vnd->sc_pending <= VND_MAXPENDING(vnd)); 679 if (vnd->sc_pending-- == VND_MAXPENDING(vnd)) 680 wakeup(&vnd->sc_pending); 681 } 682 splx(s); 683 #ifdef DEBUG 684 if (vnddebug & VDB_FOLLOW) 685 printf("vndthread(%p)\n", obp); 686 #endif 687 688 if (vnd->sc_vp->v_mount == NULL) { 689 obp->b_error = ENXIO; 690 goto done; 691 } 692 #ifdef VND_COMPRESSION 693 /* handle a compressed read */ 694 if ((obp->b_flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 695 off_t bn; 696 697 /* Convert to a byte offset within the file. */ 698 bn = obp->b_rawblkno * 699 vnd->sc_dkdev.dk_label->d_secsize; 700 701 compstrategy(obp, bn); 702 goto done; 703 } 704 #endif /* VND_COMPRESSION */ 705 706 /* 707 * Allocate a header for this transfer and link it to the 708 * buffer 709 */ 710 s = splbio(); 711 vnx = VND_GETXFER(vnd); 712 splx(s); 713 vnx->vx_vnd = vnd; 714 715 s = splbio(); 716 while (vnd->sc_active >= vnd->sc_maxactive) { 717 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 718 } 719 vnd->sc_active++; 720 splx(s); 721 722 /* Instrumentation. */ 723 disk_busy(&vnd->sc_dkdev); 724 725 bp = &vnx->vx_buf; 726 buf_init(bp); 727 bp->b_flags = (obp->b_flags & (B_READ | B_PHYS | B_RAW)); 728 bp->b_oflags = obp->b_oflags; 729 bp->b_cflags = obp->b_cflags; 730 bp->b_iodone = vndiodone; 731 bp->b_private = obp; 732 bp->b_vp = vnd->sc_vp; 733 bp->b_objlock = bp->b_vp->v_interlock; 734 bp->b_data = obp->b_data; 735 bp->b_bcount = obp->b_bcount; 736 BIO_COPYPRIO(bp, obp); 737 738 /* Make sure the request succeeds while suspending this fs. */ 739 fstrans_start_lazy(vnd->sc_vp->v_mount); 740 741 /* Handle the request using the appropriate operations. */ 742 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0) 743 handle_with_strategy(vnd, obp, bp); 744 else 745 handle_with_rdwr(vnd, obp, bp); 746 747 fstrans_done(vnd->sc_vp->v_mount); 748 749 s = splbio(); 750 continue; 751 752 done: 753 biodone(obp); 754 s = splbio(); 755 } 756 757 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 758 wakeup(&vnd->sc_kthread); 759 splx(s); 760 kthread_exit(0); 761 } 762 763 /* 764 * Checks if the given vnode supports the requested operation. 765 * The operation is specified the offset returned by VOFFSET. 766 * 767 * XXX The test below used to determine this is quite fragile 768 * because it relies on the file system to use genfs to specify 769 * unimplemented operations. There might be another way to do 770 * it more cleanly. 771 */ 772 static bool 773 vnode_has_op(const struct vnode *vp, int opoffset) 774 { 775 int (*defaultp)(void *); 776 int (*opp)(void *); 777 778 defaultp = vp->v_op[VOFFSET(vop_default)]; 779 opp = vp->v_op[opoffset]; 780 781 return opp != defaultp && opp != genfs_eopnotsupp && 782 opp != genfs_badop && opp != genfs_nullop; 783 } 784 785 /* 786 * Handles the read/write request given in 'bp' using the vnode's VOP_READ 787 * and VOP_WRITE operations. 788 * 789 * 'obp' is a pointer to the original request fed to the vnd device. 790 */ 791 static void 792 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 793 { 794 bool doread; 795 off_t offset; 796 size_t len, resid; 797 struct vnode *vp; 798 int npages; 799 800 doread = bp->b_flags & B_READ; 801 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 802 len = bp->b_bcount; 803 vp = vnd->sc_vp; 804 805 #if defined(DEBUG) 806 if (vnddebug & VDB_IO) 807 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 808 ", secsize %d, offset %" PRIu64 809 ", bcount %d\n", 810 vp, doread ? "read" : "write", obp->b_rawblkno, 811 vnd->sc_dkdev.dk_label->d_secsize, offset, 812 bp->b_bcount); 813 #endif 814 815 /* Issue the read or write operation. */ 816 bp->b_error = 817 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 818 vp, bp->b_data, len, offset, UIO_SYSSPACE, 819 IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_DIRECT, 820 vnd->sc_cred, &resid, NULL); 821 bp->b_resid = resid; 822 823 /* 824 * Avoid caching too many pages, the vnd user 825 * is usually a filesystem and caches itself. 826 * We need some amount of caching to not hinder 827 * read-ahead and write-behind operations. 828 */ 829 npages = atomic_load_relaxed(&vp->v_uobj.uo_npages); 830 if (npages > VND_MAXPAGES(vnd)) { 831 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 832 (void) VOP_PUTPAGES(vp, 0, 0, 833 PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE); 834 } 835 836 /* We need to increase the number of outputs on the vnode if 837 * there was any write to it. */ 838 if (!doread) { 839 mutex_enter(vp->v_interlock); 840 vp->v_numoutput++; 841 mutex_exit(vp->v_interlock); 842 } 843 844 biodone(bp); 845 } 846 847 /* 848 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 849 * and VOP_STRATEGY operations. 850 * 851 * 'obp' is a pointer to the original request fed to the vnd device. 852 */ 853 static void 854 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 855 struct buf *bp) 856 { 857 int bsize, error, flags, skipped; 858 size_t resid, sz; 859 off_t bn, offset; 860 struct vnode *vp; 861 struct buf *nbp = NULL; 862 863 flags = obp->b_flags; 864 865 866 /* convert to a byte offset within the file. */ 867 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 868 869 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 870 skipped = 0; 871 872 /* 873 * Break the request into bsize pieces and feed them 874 * sequentially using VOP_BMAP/VOP_STRATEGY. 875 * We do it this way to keep from flooding NFS servers if we 876 * are connected to an NFS file. This places the burden on 877 * the client rather than the server. 878 */ 879 error = 0; 880 bp->b_resid = bp->b_bcount; 881 for (offset = 0, resid = bp->b_resid; /* true */; 882 resid -= sz, offset += sz) { 883 daddr_t nbn; 884 int off, nra; 885 886 nra = 0; 887 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 888 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 889 VOP_UNLOCK(vnd->sc_vp); 890 891 if (error == 0 && (long)nbn == -1) 892 error = EIO; 893 894 /* 895 * If there was an error or a hole in the file...punt. 896 * Note that we may have to wait for any operations 897 * that we have already fired off before releasing 898 * the buffer. 899 * 900 * XXX we could deal with holes here but it would be 901 * a hassle (in the write case). 902 */ 903 if (error) { 904 skipped += resid; 905 break; 906 } 907 908 #ifdef DEBUG 909 if (!dovndcluster) 910 nra = 0; 911 #endif 912 913 off = bn % bsize; 914 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 915 #ifdef DEBUG 916 if (vnddebug & VDB_IO) 917 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 918 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, 919 nbn, sz); 920 #endif 921 922 nbp = getiobuf(vp, true); 923 nestiobuf_setup(bp, nbp, offset, sz); 924 nbp->b_blkno = nbn + btodb(off); 925 926 #if 0 /* XXX #ifdef DEBUG */ 927 if (vnddebug & VDB_IO) 928 printf("vndstart(%ld): bp %p vp %p blkno " 929 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 930 (long) (vnd-vnd_softc), &nbp->vb_buf, 931 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 932 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 933 nbp->vb_buf.b_bcount); 934 #endif 935 if (resid == sz) { 936 break; 937 } 938 VOP_STRATEGY(vp, nbp); 939 bn += sz; 940 } 941 if (!(flags & B_READ)) { 942 struct vnode *w_vp; 943 /* 944 * this is the last nested buf, account for 945 * the parent buf write too. 946 * This has to be done last, so that 947 * fsync won't wait for this write which 948 * has no chance to complete before all nested bufs 949 * have been queued. But it has to be done 950 * before the last VOP_STRATEGY() 951 * or the call to nestiobuf_done(). 952 */ 953 w_vp = bp->b_vp; 954 mutex_enter(w_vp->v_interlock); 955 w_vp->v_numoutput++; 956 mutex_exit(w_vp->v_interlock); 957 } 958 KASSERT(skipped != 0 || nbp != NULL); 959 if (skipped) 960 nestiobuf_done(bp, skipped, error); 961 else 962 VOP_STRATEGY(vp, nbp); 963 } 964 965 static void 966 vndiodone(struct buf *bp) 967 { 968 struct vndxfer *vnx = VND_BUFTOXFER(bp); 969 struct vnd_softc *vnd = vnx->vx_vnd; 970 struct buf *obp = bp->b_private; 971 int s = splbio(); 972 973 KERNEL_LOCK(1, NULL); /* XXXSMP */ 974 KASSERT(&vnx->vx_buf == bp); 975 KASSERT(vnd->sc_active > 0); 976 #ifdef DEBUG 977 if (vnddebug & VDB_IO) { 978 printf("vndiodone1: bp %p iodone: error %d\n", 979 bp, bp->b_error); 980 } 981 #endif 982 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 983 (bp->b_flags & B_READ)); 984 vnd->sc_active--; 985 if (vnd->sc_active == 0) { 986 wakeup(&vnd->sc_tab); 987 } 988 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 989 splx(s); 990 obp->b_error = bp->b_error; 991 obp->b_resid = bp->b_resid; 992 buf_destroy(bp); 993 VND_PUTXFER(vnd, vnx); 994 biodone(obp); 995 } 996 997 /* ARGSUSED */ 998 static int 999 vndread(dev_t dev, struct uio *uio, int flags) 1000 { 1001 int unit = vndunit(dev); 1002 struct vnd_softc *sc; 1003 1004 #ifdef DEBUG 1005 if (vnddebug & VDB_FOLLOW) 1006 printf("vndread(0x%"PRIx64", %p)\n", dev, uio); 1007 #endif 1008 1009 sc = device_lookup_private(&vnd_cd, unit); 1010 if (sc == NULL) 1011 return ENXIO; 1012 1013 if ((sc->sc_flags & VNF_INITED) == 0) 1014 return ENXIO; 1015 1016 return physio(vndstrategy, NULL, dev, B_READ, minphys, uio); 1017 } 1018 1019 /* ARGSUSED */ 1020 static int 1021 vndwrite(dev_t dev, struct uio *uio, int flags) 1022 { 1023 int unit = vndunit(dev); 1024 struct vnd_softc *sc; 1025 1026 #ifdef DEBUG 1027 if (vnddebug & VDB_FOLLOW) 1028 printf("vndwrite(0x%"PRIx64", %p)\n", dev, uio); 1029 #endif 1030 1031 sc = device_lookup_private(&vnd_cd, unit); 1032 if (sc == NULL) 1033 return ENXIO; 1034 1035 if ((sc->sc_flags & VNF_INITED) == 0) 1036 return ENXIO; 1037 1038 return physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio); 1039 } 1040 1041 static int 1042 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 1043 { 1044 int error; 1045 struct vnd_softc *vnd; 1046 1047 if (*un == -1) 1048 *un = unit; 1049 if (*un < 0) 1050 return EINVAL; 1051 1052 vnd = device_lookup_private(&vnd_cd, *un); 1053 if (vnd == NULL) 1054 return -1; 1055 1056 if ((vnd->sc_flags & VNF_INITED) == 0) 1057 return -1; 1058 1059 vn_lock(vnd->sc_vp, LK_SHARED | LK_RETRY); 1060 error = VOP_GETATTR(vnd->sc_vp, va, l->l_cred); 1061 VOP_UNLOCK(vnd->sc_vp); 1062 return error; 1063 } 1064 1065 static int 1066 vnddoclear(struct vnd_softc *vnd, int pmask, int minor, bool force) 1067 { 1068 int error; 1069 1070 if ((error = vndlock(vnd)) != 0) 1071 return error; 1072 1073 /* 1074 * Don't unconfigure if any other partitions are open 1075 * or if both the character and block flavors of this 1076 * partition are open. 1077 */ 1078 if (DK_BUSY(vnd, pmask) && !force) { 1079 vndunlock(vnd); 1080 return EBUSY; 1081 } 1082 1083 /* Delete all of our wedges */ 1084 dkwedge_delall(&vnd->sc_dkdev); 1085 1086 /* 1087 * XXX vndclear() might call vndclose() implicitly; 1088 * release lock to avoid recursion 1089 * 1090 * Set VNF_CLEARING to prevent vndopen() from 1091 * sneaking in after we vndunlock(). 1092 */ 1093 vnd->sc_flags |= VNF_CLEARING; 1094 vndunlock(vnd); 1095 vndclear(vnd, minor); 1096 #ifdef DEBUG 1097 if (vnddebug & VDB_INIT) 1098 printf("%s: CLRed\n", __func__); 1099 #endif 1100 1101 /* Destroy the xfer and buffer pools. */ 1102 pool_destroy(&vnd->sc_vxpool); 1103 1104 /* Detach the disk. */ 1105 disk_detach(&vnd->sc_dkdev); 1106 1107 return 0; 1108 } 1109 1110 static int 1111 vndioctl_get(struct lwp *l, void *data, int unit, struct vattr *va) 1112 { 1113 int error; 1114 1115 KASSERT(l); 1116 1117 /* the first member is always int vnd_unit in all the versions */ 1118 if (*(int *)data >= vnd_cd.cd_ndevs) 1119 return ENXIO; 1120 1121 switch (error = vnd_cget(l, unit, (int *)data, va)) { 1122 case -1: 1123 /* unused is not an error */ 1124 memset(va, 0, sizeof(*va)); 1125 /*FALLTHROUGH*/ 1126 case 0: 1127 return 0; 1128 default: 1129 return error; 1130 } 1131 } 1132 1133 /* ARGSUSED */ 1134 static int 1135 vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1136 { 1137 bool force; 1138 int unit = vndunit(dev); 1139 struct vnd_softc *vnd; 1140 struct vnd_ioctl *vio; 1141 struct vattr vattr; 1142 struct pathbuf *pb; 1143 struct nameidata nd; 1144 int error, part, pmask; 1145 uint64_t geomsize; 1146 int fflags; 1147 #ifdef __HAVE_OLD_DISKLABEL 1148 struct disklabel newlabel; 1149 #endif 1150 1151 #ifdef DEBUG 1152 if (vnddebug & VDB_FOLLOW) 1153 printf("vndioctl(0x%"PRIx64", 0x%lx, %p, 0x%x, %p): unit %d\n", 1154 dev, cmd, data, flag, l->l_proc, unit); 1155 #endif 1156 /* Do the get's first; they don't need initialization or verification */ 1157 switch (cmd) { 1158 case VNDIOCGET: 1159 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) 1160 return error; 1161 1162 struct vnd_user *vnu = data; 1163 vnu->vnu_dev = vattr.va_fsid; 1164 vnu->vnu_ino = vattr.va_fileid; 1165 return 0; 1166 1167 default: 1168 /* First check for COMPAT_50 hook */ 1169 MODULE_HOOK_CALL(compat_vndioctl_50_hook, 1170 (cmd, l, data, unit, &vattr, vndioctl_get), 1171 enosys(), error); 1172 1173 /* 1174 * If not present, then COMPAT_30 hook also not 1175 * present, so just continue with checks for the 1176 * "write" commands 1177 */ 1178 if (error == ENOSYS) { 1179 error = 0; 1180 break; 1181 } 1182 1183 /* If not already handled, try the COMPAT_30 hook */ 1184 if (error == EPASSTHROUGH) 1185 MODULE_HOOK_CALL(compat_vndioctl_30_hook, 1186 (cmd, l, data, unit, &vattr, vndioctl_get), 1187 enosys(), error); 1188 1189 /* If no COMPAT_30 module, or not handled, check writes */ 1190 if (error == ENOSYS || error == EPASSTHROUGH) { 1191 error = 0; 1192 break; 1193 } 1194 return error; 1195 } 1196 1197 vnd = device_lookup_private(&vnd_cd, unit); 1198 if (vnd == NULL) 1199 return ENXIO; 1200 vio = (struct vnd_ioctl *)data; 1201 1202 /* Must be open for writes for these commands... */ 1203 switch (cmd) { 1204 case VNDIOCSET50: 1205 case VNDIOCCLR50: 1206 if (!compat_vndioctl_50_hook.hooked) 1207 return EINVAL; 1208 /* FALLTHROUGH */ 1209 case VNDIOCSET: 1210 case VNDIOCCLR: 1211 case DIOCSDINFO: 1212 case DIOCWDINFO: 1213 #ifdef __HAVE_OLD_DISKLABEL 1214 case ODIOCSDINFO: 1215 case ODIOCWDINFO: 1216 #endif 1217 case DIOCKLABEL: 1218 case DIOCWLABEL: 1219 case DIOCCACHESYNC: 1220 if ((flag & FWRITE) == 0) 1221 return EBADF; 1222 } 1223 1224 /* Must be initialized for these... */ 1225 switch (cmd) { 1226 case VNDIOCCLR: 1227 case VNDIOCCLR50: 1228 case DIOCGDINFO: 1229 case DIOCSDINFO: 1230 case DIOCWDINFO: 1231 case DIOCGPARTINFO: 1232 case DIOCKLABEL: 1233 case DIOCWLABEL: 1234 case DIOCGDEFLABEL: 1235 case DIOCGCACHE: 1236 case DIOCGSTRATEGY: 1237 case DIOCCACHESYNC: 1238 #ifdef __HAVE_OLD_DISKLABEL 1239 case ODIOCGDINFO: 1240 case ODIOCSDINFO: 1241 case ODIOCWDINFO: 1242 case ODIOCGDEFLABEL: 1243 #endif 1244 if ((vnd->sc_flags & VNF_INITED) == 0) 1245 return ENXIO; 1246 } 1247 1248 error = disk_ioctl(&vnd->sc_dkdev, dev, cmd, data, flag, l); 1249 if (error != EPASSTHROUGH) 1250 return error; 1251 1252 1253 switch (cmd) { 1254 case VNDIOCSET50: 1255 case VNDIOCSET: 1256 if (vnd->sc_flags & VNF_INITED) 1257 return EBUSY; 1258 1259 if ((error = vndlock(vnd)) != 0) 1260 return error; 1261 1262 fflags = FREAD; 1263 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 1264 fflags |= FWRITE; 1265 if ((vio->vnd_flags & VNDIOF_FILEIO) != 0) 1266 vnd->sc_flags |= VNF_USE_VN_RDWR; 1267 error = pathbuf_copyin(vio->vnd_file, &pb); 1268 if (error) { 1269 goto unlock_and_exit; 1270 } 1271 NDINIT(&nd, LOOKUP, FOLLOW, pb); 1272 if ((error = vn_open(&nd, fflags, 0)) != 0) { 1273 pathbuf_destroy(pb); 1274 goto unlock_and_exit; 1275 } 1276 KASSERT(l); 1277 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred); 1278 if (!error && nd.ni_vp->v_type != VREG) 1279 error = EOPNOTSUPP; 1280 if (!error && vattr.va_bytes < vattr.va_size) 1281 /* File is definitely sparse, use vn_rdwr() */ 1282 vnd->sc_flags |= VNF_USE_VN_RDWR; 1283 if (error) { 1284 VOP_UNLOCK(nd.ni_vp); 1285 goto close_and_exit; 1286 } 1287 1288 /* If using a compressed file, initialize its info */ 1289 /* (or abort with an error if kernel has no compression) */ 1290 if (vio->vnd_flags & VNDIOF_COMP) { 1291 #ifdef VND_COMPRESSION 1292 struct vnd_comp_header *ch; 1293 int i; 1294 uint32_t comp_size; 1295 uint32_t comp_maxsize; 1296 1297 /* allocate space for compresed file header */ 1298 ch = malloc(sizeof(struct vnd_comp_header), 1299 M_TEMP, M_WAITOK); 1300 1301 /* read compressed file header */ 1302 error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch, 1303 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1304 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1305 if (error) { 1306 free(ch, M_TEMP); 1307 VOP_UNLOCK(nd.ni_vp); 1308 goto close_and_exit; 1309 } 1310 1311 if (be32toh(ch->block_size) == 0 || 1312 be32toh(ch->num_blocks) > UINT32_MAX - 1) { 1313 free(ch, M_TEMP); 1314 VOP_UNLOCK(nd.ni_vp); 1315 goto close_and_exit; 1316 } 1317 1318 /* save some header info */ 1319 vnd->sc_comp_blksz = be32toh(ch->block_size); 1320 /* note last offset is the file byte size */ 1321 vnd->sc_comp_numoffs = be32toh(ch->num_blocks) + 1; 1322 free(ch, M_TEMP); 1323 if (!DK_DEV_BSIZE_OK(vnd->sc_comp_blksz)) { 1324 VOP_UNLOCK(nd.ni_vp); 1325 error = EINVAL; 1326 goto close_and_exit; 1327 } 1328 KASSERT(0 < vnd->sc_comp_blksz); 1329 KASSERT(0 < vnd->sc_comp_numoffs); 1330 /* 1331 * @#^@!$& gcc -Wtype-limits refuses to let me 1332 * write SIZE_MAX/sizeof(uint64_t) < numoffs, 1333 * because the range of the type on amd64 makes 1334 * the comparisons always false. 1335 */ 1336 #if SIZE_MAX <= UINT32_MAX*(64/CHAR_BIT) 1337 if (SIZE_MAX/sizeof(uint64_t) < vnd->sc_comp_numoffs) { 1338 VOP_UNLOCK(nd.ni_vp); 1339 error = EINVAL; 1340 goto close_and_exit; 1341 } 1342 #endif 1343 if ((vattr.va_size < sizeof(struct vnd_comp_header)) || 1344 (vattr.va_size - sizeof(struct vnd_comp_header) < 1345 sizeof(uint64_t)*vnd->sc_comp_numoffs) || 1346 (UQUAD_MAX/vnd->sc_comp_blksz < 1347 vnd->sc_comp_numoffs - 1)) { 1348 VOP_UNLOCK(nd.ni_vp); 1349 error = EINVAL; 1350 goto close_and_exit; 1351 } 1352 1353 /* set decompressed file size */ 1354 KASSERT(vnd->sc_comp_numoffs - 1 <= 1355 UQUAD_MAX/vnd->sc_comp_blksz); 1356 vattr.va_size = 1357 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1358 (u_quad_t)vnd->sc_comp_blksz; 1359 1360 /* allocate space for all the compressed offsets */ 1361 __CTASSERT(UINT32_MAX <= UQUAD_MAX/sizeof(uint64_t)); 1362 vnd->sc_comp_offsets = 1363 malloc(sizeof(uint64_t) * vnd->sc_comp_numoffs, 1364 M_DEVBUF, M_WAITOK); 1365 1366 /* read in the offsets */ 1367 error = vn_rdwr(UIO_READ, nd.ni_vp, 1368 (void *)vnd->sc_comp_offsets, 1369 sizeof(uint64_t) * vnd->sc_comp_numoffs, 1370 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1371 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1372 if (error) { 1373 VOP_UNLOCK(nd.ni_vp); 1374 goto close_and_exit; 1375 } 1376 /* 1377 * find largest block size (used for allocation limit). 1378 * Also convert offset to native byte order. 1379 */ 1380 comp_maxsize = 0; 1381 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1382 vnd->sc_comp_offsets[i] = 1383 be64toh(vnd->sc_comp_offsets[i]); 1384 comp_size = 1385 be64toh(vnd->sc_comp_offsets[i + 1]) 1386 - vnd->sc_comp_offsets[i]; 1387 if (comp_size > comp_maxsize) 1388 comp_maxsize = comp_size; 1389 } 1390 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1391 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs 1392 - 1]); 1393 1394 /* create compressed data buffer */ 1395 vnd->sc_comp_buff = malloc(comp_maxsize, 1396 M_DEVBUF, M_WAITOK); 1397 1398 /* create decompressed buffer */ 1399 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1400 M_DEVBUF, M_WAITOK); 1401 vnd->sc_comp_buffblk = -1; 1402 1403 /* Initialize decompress stream */ 1404 memset(&vnd->sc_comp_stream, 0, sizeof(z_stream)); 1405 vnd->sc_comp_stream.zalloc = vnd_alloc; 1406 vnd->sc_comp_stream.zfree = vnd_free; 1407 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1408 if (error) { 1409 if (vnd->sc_comp_stream.msg) 1410 printf("vnd%d: compressed file, %s\n", 1411 unit, vnd->sc_comp_stream.msg); 1412 VOP_UNLOCK(nd.ni_vp); 1413 error = EINVAL; 1414 goto close_and_exit; 1415 } 1416 1417 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1418 #else /* !VND_COMPRESSION */ 1419 VOP_UNLOCK(nd.ni_vp); 1420 error = EOPNOTSUPP; 1421 goto close_and_exit; 1422 #endif /* VND_COMPRESSION */ 1423 } 1424 1425 VOP_UNLOCK(nd.ni_vp); 1426 vnd->sc_vp = nd.ni_vp; 1427 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1428 1429 /* get smallest I/O size for underlying device, fall back to 1430 * fundamental I/O size of underlying filesystem 1431 */ 1432 error = bdev_ioctl(vattr.va_fsid, DIOCGSECTORSIZE, &vnd->sc_iosize, FKIOCTL, l); 1433 if (error) 1434 vnd->sc_iosize = vnd->sc_vp->v_mount->mnt_stat.f_frsize; 1435 1436 /* 1437 * Use pseudo-geometry specified. If none was provided, 1438 * use "standard" Adaptec fictitious geometry. 1439 */ 1440 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1441 1442 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1443 sizeof(vio->vnd_geom)); 1444 1445 /* 1446 * Sanity-check the sector size. 1447 */ 1448 if (!DK_DEV_BSIZE_OK(vnd->sc_geom.vng_secsize) || 1449 vnd->sc_geom.vng_ntracks == 0 || 1450 vnd->sc_geom.vng_nsectors == 0) { 1451 error = EINVAL; 1452 goto close_and_exit; 1453 } 1454 1455 /* 1456 * Compute missing cylinder count from size 1457 */ 1458 if (vnd->sc_geom.vng_ncylinders == 0) 1459 vnd->sc_geom.vng_ncylinders = vnd->sc_size / ( 1460 (vnd->sc_geom.vng_secsize / DEV_BSIZE) * 1461 vnd->sc_geom.vng_ntracks * 1462 vnd->sc_geom.vng_nsectors); 1463 1464 /* 1465 * Compute the size (in DEV_BSIZE blocks) specified 1466 * by the geometry. 1467 */ 1468 geomsize = (int64_t)vnd->sc_geom.vng_nsectors * 1469 vnd->sc_geom.vng_ntracks * 1470 vnd->sc_geom.vng_ncylinders * 1471 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1472 1473 /* 1474 * Sanity-check the size against the specified 1475 * geometry. 1476 */ 1477 if (vnd->sc_size < geomsize) { 1478 error = EINVAL; 1479 goto close_and_exit; 1480 } 1481 } else if (vnd->sc_size >= (32 * 64)) { 1482 /* 1483 * Size must be at least 2048 DEV_BSIZE blocks 1484 * (1M) in order to use this geometry. 1485 */ 1486 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1487 vnd->sc_geom.vng_nsectors = 32; 1488 vnd->sc_geom.vng_ntracks = 64; 1489 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1490 } else { 1491 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1492 vnd->sc_geom.vng_nsectors = 1; 1493 vnd->sc_geom.vng_ntracks = 1; 1494 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1495 } 1496 1497 vnd_set_geometry(vnd); 1498 1499 if (vio->vnd_flags & VNDIOF_READONLY) { 1500 vnd->sc_flags |= VNF_READONLY; 1501 } 1502 1503 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1504 goto close_and_exit; 1505 1506 vndthrottle(vnd, vnd->sc_vp); 1507 vio->vnd_osize = dbtob(vnd->sc_size); 1508 if (cmd != VNDIOCSET50) 1509 vio->vnd_size = dbtob(vnd->sc_size); 1510 vnd->sc_flags |= VNF_INITED; 1511 1512 /* create the kernel thread, wait for it to be up */ 1513 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, 1514 &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev)); 1515 if (error) 1516 goto close_and_exit; 1517 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1518 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1519 } 1520 #ifdef DEBUG 1521 if (vnddebug & VDB_INIT) 1522 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1523 vnd->sc_vp, (unsigned long) vnd->sc_size, 1524 vnd->sc_geom.vng_secsize, 1525 vnd->sc_geom.vng_nsectors, 1526 vnd->sc_geom.vng_ntracks, 1527 vnd->sc_geom.vng_ncylinders); 1528 #endif 1529 1530 /* Attach the disk. */ 1531 disk_attach(&vnd->sc_dkdev); 1532 1533 /* Initialize the xfer and buffer pools. */ 1534 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1535 0, 0, "vndxpl", NULL, IPL_BIO); 1536 1537 vndunlock(vnd); 1538 1539 pathbuf_destroy(pb); 1540 1541 /* Discover wedges on this disk */ 1542 dkwedge_discover(&vnd->sc_dkdev); 1543 1544 break; 1545 1546 close_and_exit: 1547 (void) vn_close(nd.ni_vp, fflags, l->l_cred); 1548 pathbuf_destroy(pb); 1549 unlock_and_exit: 1550 #ifdef VND_COMPRESSION 1551 /* free any allocated memory (for compressed file) */ 1552 if (vnd->sc_comp_offsets) { 1553 free(vnd->sc_comp_offsets, M_DEVBUF); 1554 vnd->sc_comp_offsets = NULL; 1555 } 1556 if (vnd->sc_comp_buff) { 1557 free(vnd->sc_comp_buff, M_DEVBUF); 1558 vnd->sc_comp_buff = NULL; 1559 } 1560 if (vnd->sc_comp_decombuf) { 1561 free(vnd->sc_comp_decombuf, M_DEVBUF); 1562 vnd->sc_comp_decombuf = NULL; 1563 } 1564 #endif /* VND_COMPRESSION */ 1565 vndunlock(vnd); 1566 return error; 1567 1568 case VNDIOCCLR50: 1569 case VNDIOCCLR: 1570 part = DISKPART(dev); 1571 pmask = (1 << part); 1572 force = (vio->vnd_flags & VNDIOF_FORCE) != 0; 1573 1574 if ((error = vnddoclear(vnd, pmask, minor(dev), force)) != 0) 1575 return error; 1576 1577 break; 1578 1579 1580 case DIOCWDINFO: 1581 case DIOCSDINFO: 1582 #ifdef __HAVE_OLD_DISKLABEL 1583 case ODIOCWDINFO: 1584 case ODIOCSDINFO: 1585 #endif 1586 { 1587 struct disklabel *lp; 1588 1589 if ((error = vndlock(vnd)) != 0) 1590 return error; 1591 1592 vnd->sc_flags |= VNF_LABELLING; 1593 1594 #ifdef __HAVE_OLD_DISKLABEL 1595 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1596 memset(&newlabel, 0, sizeof newlabel); 1597 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1598 lp = &newlabel; 1599 } else 1600 #endif 1601 lp = (struct disklabel *)data; 1602 1603 error = setdisklabel(vnd->sc_dkdev.dk_label, 1604 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1605 if (error == 0) { 1606 if (cmd == DIOCWDINFO 1607 #ifdef __HAVE_OLD_DISKLABEL 1608 || cmd == ODIOCWDINFO 1609 #endif 1610 ) 1611 error = writedisklabel(VNDLABELDEV(dev), 1612 vndstrategy, vnd->sc_dkdev.dk_label, 1613 vnd->sc_dkdev.dk_cpulabel); 1614 } 1615 1616 vnd->sc_flags &= ~VNF_LABELLING; 1617 1618 vndunlock(vnd); 1619 1620 if (error) 1621 return error; 1622 break; 1623 } 1624 1625 case DIOCKLABEL: 1626 if (*(int *)data != 0) 1627 vnd->sc_flags |= VNF_KLABEL; 1628 else 1629 vnd->sc_flags &= ~VNF_KLABEL; 1630 break; 1631 1632 case DIOCWLABEL: 1633 if (*(int *)data != 0) 1634 vnd->sc_flags |= VNF_WLABEL; 1635 else 1636 vnd->sc_flags &= ~VNF_WLABEL; 1637 break; 1638 1639 case DIOCGDEFLABEL: 1640 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1641 break; 1642 1643 #ifdef __HAVE_OLD_DISKLABEL 1644 case ODIOCGDEFLABEL: 1645 vndgetdefaultlabel(vnd, &newlabel); 1646 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1647 return ENOTTY; 1648 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1649 break; 1650 #endif 1651 1652 case DIOCGSTRATEGY: 1653 { 1654 struct disk_strategy *dks = (void *)data; 1655 1656 /* No lock needed, never changed */ 1657 strlcpy(dks->dks_name, 1658 bufq_getstrategyname(vnd->sc_tab), 1659 sizeof(dks->dks_name)); 1660 dks->dks_paramlen = 0; 1661 break; 1662 } 1663 case DIOCGCACHE: 1664 { 1665 int *bits = (int *)data; 1666 *bits |= DKCACHE_READ | DKCACHE_WRITE; 1667 break; 1668 } 1669 case DIOCCACHESYNC: 1670 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1671 error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred, 1672 FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0); 1673 VOP_UNLOCK(vnd->sc_vp); 1674 return error; 1675 1676 default: 1677 return ENOTTY; 1678 } 1679 1680 return 0; 1681 } 1682 1683 /* 1684 * Duplicate the current processes' credentials. Since we are called only 1685 * as the result of a SET ioctl and only root can do that, any future access 1686 * to this "disk" is essentially as root. Note that credentials may change 1687 * if some other uid can write directly to the mapped file (NFS). 1688 */ 1689 static int 1690 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1691 { 1692 struct uio auio; 1693 struct iovec aiov; 1694 char *tmpbuf; 1695 int error; 1696 1697 vnd->sc_cred = kauth_cred_dup(cred); 1698 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1699 1700 /* XXX: Horrible kludge to establish credentials for NFS */ 1701 aiov.iov_base = tmpbuf; 1702 aiov.iov_len = uimin(DEV_BSIZE, dbtob(vnd->sc_size)); 1703 auio.uio_iov = &aiov; 1704 auio.uio_iovcnt = 1; 1705 auio.uio_offset = 0; 1706 auio.uio_rw = UIO_READ; 1707 auio.uio_resid = aiov.iov_len; 1708 UIO_SETUP_SYSSPACE(&auio); 1709 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1710 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1711 if (error == 0) { 1712 /* 1713 * Because vnd does all IO directly through the vnode 1714 * we need to flush (at least) the buffer from the above 1715 * VOP_READ from the buffer cache to prevent cache 1716 * incoherencies. Also, be careful to write dirty 1717 * buffers back to stable storage. 1718 */ 1719 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1720 curlwp, 0, 0); 1721 } 1722 VOP_UNLOCK(vnd->sc_vp); 1723 1724 free(tmpbuf, M_TEMP); 1725 return error; 1726 } 1727 1728 /* 1729 * Set maxactive based on FS type 1730 */ 1731 static void 1732 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1733 { 1734 1735 if (vp->v_tag == VT_NFS) 1736 vnd->sc_maxactive = 2; 1737 else 1738 vnd->sc_maxactive = 8; 1739 1740 if (vnd->sc_maxactive < 1) 1741 vnd->sc_maxactive = 1; 1742 } 1743 1744 #if 0 1745 static void 1746 vndshutdown(void) 1747 { 1748 struct vnd_softc *vnd; 1749 1750 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1751 if (vnd->sc_flags & VNF_INITED) 1752 vndclear(vnd); 1753 } 1754 #endif 1755 1756 static void 1757 vndclear(struct vnd_softc *vnd, int myminor) 1758 { 1759 struct vnode *vp = vnd->sc_vp; 1760 int fflags = FREAD; 1761 int bmaj, cmaj, i, mn; 1762 int s; 1763 1764 #ifdef DEBUG 1765 if (vnddebug & VDB_FOLLOW) 1766 printf("vndclear(%p): vp %p\n", vnd, vp); 1767 #endif 1768 /* locate the major number */ 1769 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1770 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1771 1772 /* Nuke the vnodes for any open instances */ 1773 for (i = 0; i < MAXPARTITIONS; i++) { 1774 mn = DISKMINOR(device_unit(vnd->sc_dev), i); 1775 vdevgone(bmaj, mn, mn, VBLK); 1776 if (mn != myminor) /* XXX avoid to kill own vnode */ 1777 vdevgone(cmaj, mn, mn, VCHR); 1778 } 1779 1780 if ((vnd->sc_flags & VNF_READONLY) == 0) 1781 fflags |= FWRITE; 1782 1783 s = splbio(); 1784 bufq_drain(vnd->sc_tab); 1785 splx(s); 1786 1787 vnd->sc_flags |= VNF_VUNCONF; 1788 wakeup(&vnd->sc_tab); 1789 while (vnd->sc_flags & VNF_KTHREAD) 1790 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1791 1792 #ifdef VND_COMPRESSION 1793 /* free the compressed file buffers */ 1794 if (vnd->sc_flags & VNF_COMP) { 1795 if (vnd->sc_comp_offsets) { 1796 free(vnd->sc_comp_offsets, M_DEVBUF); 1797 vnd->sc_comp_offsets = NULL; 1798 } 1799 if (vnd->sc_comp_buff) { 1800 free(vnd->sc_comp_buff, M_DEVBUF); 1801 vnd->sc_comp_buff = NULL; 1802 } 1803 if (vnd->sc_comp_decombuf) { 1804 free(vnd->sc_comp_decombuf, M_DEVBUF); 1805 vnd->sc_comp_decombuf = NULL; 1806 } 1807 } 1808 #endif /* VND_COMPRESSION */ 1809 vnd->sc_flags &= 1810 ~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL 1811 | VNF_VUNCONF | VNF_COMP | VNF_CLEARING); 1812 if (vp == NULL) 1813 panic("vndclear: null vp"); 1814 (void) vn_close(vp, fflags, vnd->sc_cred); 1815 kauth_cred_free(vnd->sc_cred); 1816 vnd->sc_vp = NULL; 1817 vnd->sc_cred = NULL; 1818 vnd->sc_size = 0; 1819 } 1820 1821 static int 1822 vndsize(dev_t dev) 1823 { 1824 struct vnd_softc *sc; 1825 struct disklabel *lp; 1826 int part, unit, omask; 1827 int size; 1828 1829 unit = vndunit(dev); 1830 sc = device_lookup_private(&vnd_cd, unit); 1831 if (sc == NULL) 1832 return -1; 1833 1834 if ((sc->sc_flags & VNF_INITED) == 0) 1835 return -1; 1836 1837 part = DISKPART(dev); 1838 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1839 lp = sc->sc_dkdev.dk_label; 1840 1841 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1842 return -1; 1843 1844 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1845 size = -1; 1846 else 1847 size = lp->d_partitions[part].p_size * 1848 (lp->d_secsize / DEV_BSIZE); 1849 1850 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1851 return -1; 1852 1853 return size; 1854 } 1855 1856 static int 1857 vnddump(dev_t dev, daddr_t blkno, void *va, 1858 size_t size) 1859 { 1860 1861 /* Not implemented. */ 1862 return ENXIO; 1863 } 1864 1865 static void 1866 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1867 { 1868 struct vndgeom *vng = &sc->sc_geom; 1869 struct partition *pp; 1870 unsigned spb; 1871 1872 memset(lp, 0, sizeof(*lp)); 1873 1874 spb = vng->vng_secsize / DEV_BSIZE; 1875 if (sc->sc_size / spb > UINT32_MAX) 1876 lp->d_secperunit = UINT32_MAX; 1877 else 1878 lp->d_secperunit = sc->sc_size / spb; 1879 lp->d_secsize = vng->vng_secsize; 1880 lp->d_nsectors = vng->vng_nsectors; 1881 lp->d_ntracks = vng->vng_ntracks; 1882 lp->d_ncylinders = vng->vng_ncylinders; 1883 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1884 1885 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1886 lp->d_type = DKTYPE_VND; 1887 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1888 lp->d_rpm = 3600; 1889 lp->d_interleave = 1; 1890 lp->d_flags = 0; 1891 1892 pp = &lp->d_partitions[RAW_PART]; 1893 pp->p_offset = 0; 1894 pp->p_size = lp->d_secperunit; 1895 pp->p_fstype = FS_UNUSED; 1896 lp->d_npartitions = RAW_PART + 1; 1897 1898 lp->d_magic = DISKMAGIC; 1899 lp->d_magic2 = DISKMAGIC; 1900 lp->d_checksum = dkcksum(lp); 1901 } 1902 1903 /* 1904 * Read the disklabel from a vnd. If one is not present, create a fake one. 1905 */ 1906 static void 1907 vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1908 { 1909 const char *errstring; 1910 struct disklabel *lp = sc->sc_dkdev.dk_label; 1911 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1912 int i; 1913 1914 memset(clp, 0, sizeof(*clp)); 1915 1916 vndgetdefaultlabel(sc, lp); 1917 1918 /* 1919 * Call the generic disklabel extraction routine. 1920 */ 1921 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1922 if (errstring) { 1923 /* 1924 * Lack of disklabel is common, but we print the warning 1925 * anyway, since it might contain other useful information. 1926 */ 1927 aprint_normal_dev(sc->sc_dev, "%s\n", errstring); 1928 1929 /* 1930 * For historical reasons, if there's no disklabel 1931 * present, all partitions must be FS_BSDFFS and 1932 * occupy the entire disk. 1933 */ 1934 for (i = 0; i < MAXPARTITIONS; i++) { 1935 /* 1936 * Don't wipe out port specific hack (such as 1937 * dos partition hack of i386 port). 1938 */ 1939 if (lp->d_partitions[i].p_size != 0) 1940 continue; 1941 1942 lp->d_partitions[i].p_size = lp->d_secperunit; 1943 lp->d_partitions[i].p_offset = 0; 1944 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1945 } 1946 1947 strncpy(lp->d_packname, "default label", 1948 sizeof(lp->d_packname)); 1949 1950 lp->d_npartitions = MAXPARTITIONS; 1951 lp->d_checksum = dkcksum(lp); 1952 } 1953 } 1954 1955 /* 1956 * Wait interruptibly for an exclusive lock. 1957 * 1958 * XXX 1959 * Several drivers do this; it should be abstracted and made MP-safe. 1960 */ 1961 static int 1962 vndlock(struct vnd_softc *sc) 1963 { 1964 int error; 1965 1966 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1967 sc->sc_flags |= VNF_WANTED; 1968 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1969 return error; 1970 } 1971 sc->sc_flags |= VNF_LOCKED; 1972 return 0; 1973 } 1974 1975 /* 1976 * Unlock and wake up any waiters. 1977 */ 1978 static void 1979 vndunlock(struct vnd_softc *sc) 1980 { 1981 1982 sc->sc_flags &= ~VNF_LOCKED; 1983 if ((sc->sc_flags & VNF_WANTED) != 0) { 1984 sc->sc_flags &= ~VNF_WANTED; 1985 wakeup(sc); 1986 } 1987 } 1988 1989 #ifdef VND_COMPRESSION 1990 /* compressed file read */ 1991 static void 1992 compstrategy(struct buf *bp, off_t bn) 1993 { 1994 int error; 1995 int unit = vndunit(bp->b_dev); 1996 struct vnd_softc *vnd = 1997 device_lookup_private(&vnd_cd, unit); 1998 u_int32_t comp_block; 1999 struct uio auio; 2000 char *addr; 2001 int s; 2002 2003 /* set up constants for data move */ 2004 auio.uio_rw = UIO_READ; 2005 UIO_SETUP_SYSSPACE(&auio); 2006 2007 /* read, and transfer the data */ 2008 addr = bp->b_data; 2009 bp->b_resid = bp->b_bcount; 2010 s = splbio(); 2011 while (bp->b_resid > 0) { 2012 unsigned length; 2013 size_t length_in_buffer; 2014 u_int32_t offset_in_buffer; 2015 struct iovec aiov; 2016 2017 /* calculate the compressed block number */ 2018 comp_block = bn / (off_t)vnd->sc_comp_blksz; 2019 2020 /* check for good block number */ 2021 if (comp_block >= vnd->sc_comp_numoffs) { 2022 bp->b_error = EINVAL; 2023 splx(s); 2024 return; 2025 } 2026 2027 /* read in the compressed block, if not in buffer */ 2028 if (comp_block != vnd->sc_comp_buffblk) { 2029 length = vnd->sc_comp_offsets[comp_block + 1] - 2030 vnd->sc_comp_offsets[comp_block]; 2031 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 2032 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 2033 length, vnd->sc_comp_offsets[comp_block], 2034 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred, 2035 NULL, NULL); 2036 if (error) { 2037 bp->b_error = error; 2038 VOP_UNLOCK(vnd->sc_vp); 2039 splx(s); 2040 return; 2041 } 2042 /* uncompress the buffer */ 2043 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 2044 vnd->sc_comp_stream.avail_in = length; 2045 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 2046 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 2047 inflateReset(&vnd->sc_comp_stream); 2048 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 2049 if (error != Z_STREAM_END) { 2050 if (vnd->sc_comp_stream.msg) 2051 aprint_normal_dev(vnd->sc_dev, 2052 "compressed file, %s\n", 2053 vnd->sc_comp_stream.msg); 2054 bp->b_error = EBADMSG; 2055 VOP_UNLOCK(vnd->sc_vp); 2056 splx(s); 2057 return; 2058 } 2059 vnd->sc_comp_buffblk = comp_block; 2060 VOP_UNLOCK(vnd->sc_vp); 2061 } 2062 2063 /* transfer the usable uncompressed data */ 2064 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 2065 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 2066 if (length_in_buffer > bp->b_resid) 2067 length_in_buffer = bp->b_resid; 2068 auio.uio_iov = &aiov; 2069 auio.uio_iovcnt = 1; 2070 aiov.iov_base = addr; 2071 aiov.iov_len = length_in_buffer; 2072 auio.uio_resid = aiov.iov_len; 2073 auio.uio_offset = 0; 2074 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 2075 length_in_buffer, &auio); 2076 if (error) { 2077 bp->b_error = error; 2078 splx(s); 2079 return; 2080 } 2081 2082 bn += length_in_buffer; 2083 addr += length_in_buffer; 2084 bp->b_resid -= length_in_buffer; 2085 } 2086 splx(s); 2087 } 2088 2089 /* compression memory allocation routines */ 2090 static void * 2091 vnd_alloc(void *aux, u_int items, u_int siz) 2092 { 2093 return malloc(items * siz, M_TEMP, M_NOWAIT); 2094 } 2095 2096 static void 2097 vnd_free(void *aux, void *ptr) 2098 { 2099 free(ptr, M_TEMP); 2100 } 2101 #endif /* VND_COMPRESSION */ 2102 2103 static void 2104 vnd_set_geometry(struct vnd_softc *vnd) 2105 { 2106 struct disk_geom *dg = &vnd->sc_dkdev.dk_geom; 2107 2108 memset(dg, 0, sizeof(*dg)); 2109 2110 dg->dg_secperunit = (int64_t)vnd->sc_geom.vng_nsectors * 2111 vnd->sc_geom.vng_ntracks * vnd->sc_geom.vng_ncylinders; 2112 dg->dg_secsize = vnd->sc_geom.vng_secsize; 2113 dg->dg_nsectors = vnd->sc_geom.vng_nsectors; 2114 dg->dg_ntracks = vnd->sc_geom.vng_ntracks; 2115 dg->dg_ncylinders = vnd->sc_geom.vng_ncylinders; 2116 2117 #ifdef DEBUG 2118 if (vnddebug & VDB_LABEL) { 2119 printf("dg->dg_secperunit: %" PRId64 "\n", dg->dg_secperunit); 2120 printf("dg->dg_ncylinders: %u\n", dg->dg_ncylinders); 2121 } 2122 #endif 2123 disk_set_info(vnd->sc_dev, &vnd->sc_dkdev, NULL); 2124 } 2125 2126 #ifdef VND_COMPRESSION 2127 #define VND_DEPENDS "zlib" 2128 #else 2129 #define VND_DEPENDS NULL 2130 #endif 2131 2132 MODULE(MODULE_CLASS_DRIVER, vnd, VND_DEPENDS); 2133 2134 #ifdef _MODULE 2135 int vnd_bmajor = -1, vnd_cmajor = -1; 2136 2137 CFDRIVER_DECL(vnd, DV_DISK, NULL); 2138 #endif 2139 2140 static int 2141 vnd_modcmd(modcmd_t cmd, void *arg) 2142 { 2143 int error = 0; 2144 2145 switch (cmd) { 2146 case MODULE_CMD_INIT: 2147 #ifdef _MODULE 2148 error = config_cfdriver_attach(&vnd_cd); 2149 if (error) 2150 break; 2151 2152 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2153 if (error) { 2154 config_cfdriver_detach(&vnd_cd); 2155 #ifdef DIAGNOSTIC 2156 aprint_error("%s: unable to register cfattach for \n" 2157 "%s, error %d", __func__, vnd_cd.cd_name, error); 2158 #endif 2159 break; 2160 } 2161 2162 /* 2163 * Attach the {b,c}devsw's 2164 */ 2165 error = devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2166 &vnd_cdevsw, &vnd_cmajor); 2167 /* 2168 * If devsw_attach fails, remove from autoconf database 2169 */ 2170 if (error) { 2171 config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); 2172 config_cfdriver_detach(&vnd_cd); 2173 #ifdef DIAGNOSTIC 2174 aprint_error("%s: unable to attach %s devsw, " 2175 "error %d", __func__, vnd_cd.cd_name, error); 2176 #endif 2177 break; 2178 } 2179 #endif 2180 break; 2181 2182 case MODULE_CMD_FINI: 2183 #ifdef _MODULE 2184 /* 2185 * Remove {b,c}devsw's 2186 */ 2187 devsw_detach(&vnd_bdevsw, &vnd_cdevsw); 2188 2189 /* 2190 * Now remove device from autoconf database 2191 */ 2192 error = config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); 2193 if (error) { 2194 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2195 &vnd_cdevsw, &vnd_cmajor); 2196 #ifdef DIAGNOSTIC 2197 aprint_error("%s: failed to detach %s cfattach, " 2198 "error %d\n", __func__, vnd_cd.cd_name, error); 2199 #endif 2200 break; 2201 } 2202 error = config_cfdriver_detach(&vnd_cd); 2203 if (error) { 2204 (void)config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2205 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2206 &vnd_cdevsw, &vnd_cmajor); 2207 #ifdef DIAGNOSTIC 2208 aprint_error("%s: failed to detach %s cfdriver, " 2209 "error %d\n", __func__, vnd_cd.cd_name, error); 2210 break; 2211 #endif 2212 } 2213 #endif 2214 break; 2215 2216 case MODULE_CMD_STAT: 2217 return ENOTTY; 2218 2219 default: 2220 return ENOTTY; 2221 } 2222 2223 return error; 2224 } 2225