1 /* $NetBSD: vnd.c,v 1.290 2024/08/15 21:08:20 mlelstv Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 66 * 67 * @(#)vn.c 8.9 (Berkeley) 5/14/95 68 */ 69 70 /* 71 * Vnode disk driver. 72 * 73 * Block/character interface to a vnode. Allows one to treat a file 74 * as a disk (e.g. build a filesystem in it, mount it, etc.). 75 * 76 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 77 * this uses them to avoid distorting the local buffer cache. If those 78 * block-level operations are not available, this falls back to the regular 79 * read and write calls. Using these may distort the cache in some cases 80 * but better have the driver working than preventing it to work on file 81 * systems where the block-level operations are not implemented for 82 * whatever reason. 83 * 84 * NOTE 2: There is a security issue involved with this driver. 85 * Once mounted all access to the contents of the "mapped" file via 86 * the special file is controlled by the permissions on the special 87 * file, the protection of the mapped file is ignored (effectively, 88 * by using root credentials in all transactions). 89 * 90 * NOTE 3: Doesn't interact with leases, should it? 91 */ 92 93 #include <sys/cdefs.h> 94 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.290 2024/08/15 21:08:20 mlelstv Exp $"); 95 96 #if defined(_KERNEL_OPT) 97 #include "opt_vnd.h" 98 #include "opt_compat_netbsd.h" 99 #endif 100 101 #include <sys/param.h> 102 #include <sys/systm.h> 103 #include <sys/namei.h> 104 #include <sys/proc.h> 105 #include <sys/kthread.h> 106 #include <sys/errno.h> 107 #include <sys/buf.h> 108 #include <sys/bufq.h> 109 #include <sys/malloc.h> 110 #include <sys/ioctl.h> 111 #include <sys/disklabel.h> 112 #include <sys/device.h> 113 #include <sys/disk.h> 114 #include <sys/stat.h> 115 #include <sys/mount.h> 116 #include <sys/vnode.h> 117 #include <sys/fstrans.h> 118 #include <sys/file.h> 119 #include <sys/uio.h> 120 #include <sys/conf.h> 121 #include <sys/kauth.h> 122 #include <sys/module.h> 123 #include <sys/compat_stub.h> 124 #include <sys/atomic.h> 125 126 #include <uvm/uvm.h> 127 128 #include <net/zlib.h> 129 130 #include <miscfs/genfs/genfs.h> 131 #include <miscfs/specfs/specdev.h> 132 133 #include <dev/dkvar.h> 134 #include <dev/vndvar.h> 135 136 #include "ioconf.h" 137 138 #if defined(VNDDEBUG) && !defined(DEBUG) 139 #define DEBUG 140 #endif 141 142 #ifdef DEBUG 143 int dovndcluster = 1; 144 #define VDB_FOLLOW 0x01 145 #define VDB_INIT 0x02 146 #define VDB_IO 0x04 147 #define VDB_LABEL 0x08 148 int vnddebug = 0; 149 #endif 150 151 #define vndunit(x) DISKUNIT(x) 152 153 struct vndxfer { 154 struct buf vx_buf; 155 struct vnd_softc *vx_vnd; 156 }; 157 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 158 159 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 160 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 161 162 #define VNDLABELDEV(dev) \ 163 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 164 165 #define VND_MAXPENDING(vnd) ((vnd)->sc_maxactive * 4) 166 #define VND_MAXPAGES(vnd) (1024 * 1024 / PAGE_SIZE) 167 168 169 static void vndclear(struct vnd_softc *, int); 170 static int vnddoclear(struct vnd_softc *, int, int, bool); 171 static int vndsetcred(struct vnd_softc *, kauth_cred_t); 172 static void vndthrottle(struct vnd_softc *, struct vnode *); 173 static void vndiodone(struct buf *); 174 #if 0 175 static void vndshutdown(void); 176 #endif 177 178 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 179 static void vndgetdisklabel(dev_t, struct vnd_softc *); 180 181 static int vndlock(struct vnd_softc *); 182 static void vndunlock(struct vnd_softc *); 183 #ifdef VND_COMPRESSION 184 static void compstrategy(struct buf *, off_t); 185 static void *vnd_alloc(void *, u_int, u_int); 186 static void vnd_free(void *, void *); 187 #endif /* VND_COMPRESSION */ 188 189 static void vndthread(void *); 190 static bool vnode_has_op(const struct vnode *, int); 191 static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 192 struct buf *); 193 static void handle_with_strategy(struct vnd_softc *, const struct buf *, 194 struct buf *); 195 static void vnd_set_geometry(struct vnd_softc *); 196 197 static dev_type_open(vndopen); 198 static dev_type_close(vndclose); 199 static dev_type_read(vndread); 200 static dev_type_write(vndwrite); 201 static dev_type_ioctl(vndioctl); 202 static dev_type_strategy(vndstrategy); 203 static dev_type_dump(vnddump); 204 static dev_type_size(vndsize); 205 206 const struct bdevsw vnd_bdevsw = { 207 .d_open = vndopen, 208 .d_close = vndclose, 209 .d_strategy = vndstrategy, 210 .d_ioctl = vndioctl, 211 .d_dump = vnddump, 212 .d_psize = vndsize, 213 .d_discard = nodiscard, 214 .d_flag = D_DISK 215 }; 216 217 const struct cdevsw vnd_cdevsw = { 218 .d_open = vndopen, 219 .d_close = vndclose, 220 .d_read = vndread, 221 .d_write = vndwrite, 222 .d_ioctl = vndioctl, 223 .d_stop = nostop, 224 .d_tty = notty, 225 .d_poll = nopoll, 226 .d_mmap = nommap, 227 .d_kqfilter = nokqfilter, 228 .d_discard = nodiscard, 229 .d_flag = D_DISK 230 }; 231 232 static int vnd_match(device_t, cfdata_t, void *); 233 static void vnd_attach(device_t, device_t, void *); 234 static int vnd_detach(device_t, int); 235 236 CFATTACH_DECL3_NEW(vnd, sizeof(struct vnd_softc), 237 vnd_match, vnd_attach, vnd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN); 238 239 static struct vnd_softc *vnd_spawn(int); 240 static int vnd_destroy(device_t); 241 242 static const struct dkdriver vnddkdriver = { 243 .d_strategy = vndstrategy, 244 .d_minphys = minphys 245 }; 246 247 void 248 vndattach(int num) 249 { 250 int error; 251 252 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 253 if (error) 254 aprint_error("%s: unable to register cfattach, error = %d\n", 255 vnd_cd.cd_name, error); 256 } 257 258 static int 259 vnd_match(device_t self, cfdata_t cfdata, void *aux) 260 { 261 262 return 1; 263 } 264 265 static void 266 vnd_attach(device_t parent, device_t self, void *aux) 267 { 268 struct vnd_softc *sc = device_private(self); 269 270 sc->sc_dev = self; 271 sc->sc_comp_offsets = NULL; 272 sc->sc_comp_buff = NULL; 273 sc->sc_comp_decombuf = NULL; 274 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 275 disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver); 276 if (!pmf_device_register(self, NULL, NULL)) 277 aprint_error_dev(self, "couldn't establish power handler\n"); 278 } 279 280 static int 281 vnd_detach(device_t self, int flags) 282 { 283 int error; 284 struct vnd_softc *sc = device_private(self); 285 286 if (sc->sc_flags & VNF_INITED) { 287 error = vnddoclear(sc, 0, -1, (flags & DETACH_FORCE) != 0); 288 if (error != 0) 289 return error; 290 } 291 292 pmf_device_deregister(self); 293 bufq_free(sc->sc_tab); 294 disk_destroy(&sc->sc_dkdev); 295 296 return 0; 297 } 298 299 static struct vnd_softc * 300 vnd_spawn(int unit) 301 { 302 cfdata_t cf; 303 304 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 305 cf->cf_name = vnd_cd.cd_name; 306 cf->cf_atname = vnd_cd.cd_name; 307 cf->cf_unit = unit; 308 cf->cf_fstate = FSTATE_STAR; 309 310 return device_private(config_attach_pseudo(cf)); 311 } 312 313 static int 314 vnd_destroy(device_t dev) 315 { 316 int error; 317 cfdata_t cf; 318 319 cf = device_cfdata(dev); 320 error = config_detach(dev, DETACH_QUIET); 321 if (error) 322 return error; 323 free(cf, M_DEVBUF); 324 return 0; 325 } 326 327 static int 328 vndopen(dev_t dev, int flags, int mode, struct lwp *l) 329 { 330 int unit = vndunit(dev); 331 struct vnd_softc *sc; 332 int error = 0, part, pmask; 333 struct disklabel *lp; 334 335 #ifdef DEBUG 336 if (vnddebug & VDB_FOLLOW) 337 printf("vndopen(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 338 #endif 339 sc = device_lookup_private(&vnd_cd, unit); 340 if (sc == NULL) { 341 sc = vnd_spawn(unit); 342 if (sc == NULL) 343 return ENOMEM; 344 345 /* compatibility, keep disklabel after close */ 346 sc->sc_flags = VNF_KLABEL; 347 } 348 349 if ((error = vndlock(sc)) != 0) 350 return error; 351 352 mutex_enter(&sc->sc_dkdev.dk_openlock); 353 354 if ((sc->sc_flags & VNF_CLEARING) != 0) { 355 error = ENXIO; 356 goto done; 357 } 358 359 lp = sc->sc_dkdev.dk_label; 360 361 part = DISKPART(dev); 362 pmask = (1 << part); 363 364 if (sc->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 365 error = EBUSY; 366 goto done; 367 } 368 369 if (sc->sc_flags & VNF_INITED) { 370 if ((sc->sc_dkdev.dk_openmask & ~(1<<RAW_PART)) != 0) { 371 /* 372 * If any non-raw partition is open, but the disk 373 * has been invalidated, disallow further opens. 374 */ 375 if ((sc->sc_flags & VNF_VLABEL) == 0) { 376 error = EIO; 377 goto done; 378 } 379 } else { 380 /* 381 * Load the partition info if not already loaded. 382 */ 383 if ((sc->sc_flags & VNF_VLABEL) == 0) { 384 sc->sc_flags |= VNF_VLABEL; 385 vndgetdisklabel(dev, sc); 386 } 387 } 388 } 389 390 /* Check that the partitions exists. */ 391 if (part != RAW_PART) { 392 if (((sc->sc_flags & VNF_INITED) == 0) || 393 ((part >= lp->d_npartitions) || 394 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 395 error = ENXIO; 396 goto done; 397 } 398 } 399 400 /* Prevent our unit from being unconfigured while open. */ 401 switch (mode) { 402 case S_IFCHR: 403 sc->sc_dkdev.dk_copenmask |= pmask; 404 break; 405 406 case S_IFBLK: 407 sc->sc_dkdev.dk_bopenmask |= pmask; 408 break; 409 } 410 sc->sc_dkdev.dk_openmask = 411 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 412 413 done: 414 mutex_exit(&sc->sc_dkdev.dk_openlock); 415 vndunlock(sc); 416 return error; 417 } 418 419 static int 420 vndclose(dev_t dev, int flags, int mode, struct lwp *l) 421 { 422 int unit = vndunit(dev); 423 struct vnd_softc *sc; 424 int error = 0, part; 425 426 #ifdef DEBUG 427 if (vnddebug & VDB_FOLLOW) 428 printf("vndclose(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 429 #endif 430 sc = device_lookup_private(&vnd_cd, unit); 431 if (sc == NULL) 432 return ENXIO; 433 434 if ((error = vndlock(sc)) != 0) 435 return error; 436 437 mutex_enter(&sc->sc_dkdev.dk_openlock); 438 439 part = DISKPART(dev); 440 441 /* ...that much closer to allowing unconfiguration... */ 442 switch (mode) { 443 case S_IFCHR: 444 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 445 break; 446 447 case S_IFBLK: 448 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 449 break; 450 } 451 sc->sc_dkdev.dk_openmask = 452 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 453 454 /* are we last opener ? */ 455 if (sc->sc_dkdev.dk_openmask == 0) { 456 if ((sc->sc_flags & VNF_KLABEL) == 0) 457 sc->sc_flags &= ~VNF_VLABEL; 458 } 459 460 mutex_exit(&sc->sc_dkdev.dk_openlock); 461 462 vndunlock(sc); 463 464 if ((sc->sc_flags & VNF_INITED) == 0) { 465 if ((error = vnd_destroy(sc->sc_dev)) != 0) { 466 aprint_error_dev(sc->sc_dev, 467 "unable to detach instance\n"); 468 return error; 469 } 470 } 471 472 return 0; 473 } 474 475 /* 476 * Queue the request, and wakeup the kernel thread to handle it. 477 */ 478 static void 479 vndstrategy(struct buf *bp) 480 { 481 int unit = vndunit(bp->b_dev); 482 struct vnd_softc *vnd = 483 device_lookup_private(&vnd_cd, unit); 484 struct disklabel *lp; 485 daddr_t blkno; 486 int s = splbio(); 487 488 if (vnd == NULL) { 489 bp->b_error = ENXIO; 490 goto done; 491 } 492 lp = vnd->sc_dkdev.dk_label; 493 494 if ((vnd->sc_flags & VNF_INITED) == 0) { 495 bp->b_error = ENXIO; 496 goto done; 497 } 498 499 /* 500 * The transfer must be a whole number of blocks. 501 */ 502 if ((bp->b_bcount % lp->d_secsize) != 0) { 503 bp->b_error = EINVAL; 504 goto done; 505 } 506 507 /* 508 * check if we're read-only. 509 */ 510 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 511 bp->b_error = EACCES; 512 goto done; 513 } 514 515 /* If it's a nil transfer, wake up the top half now. */ 516 if (bp->b_bcount == 0) { 517 goto done; 518 } 519 520 /* 521 * Do bounds checking and adjust transfer. If there's an error, 522 * the bounds check will flag that for us. 523 */ 524 if (DISKPART(bp->b_dev) == RAW_PART) { 525 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 526 vnd->sc_size) <= 0) 527 goto done; 528 } else { 529 if (bounds_check_with_label(&vnd->sc_dkdev, 530 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 531 goto done; 532 } 533 534 /* 535 * Put the block number in terms of the logical blocksize 536 * of the "device". 537 */ 538 539 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 540 541 /* 542 * Translate the partition-relative block number to an absolute. 543 */ 544 if (DISKPART(bp->b_dev) != RAW_PART) { 545 struct partition *pp; 546 547 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 548 DISKPART(bp->b_dev)]; 549 blkno += pp->p_offset; 550 } 551 bp->b_rawblkno = blkno; 552 553 #ifdef DEBUG 554 if (vnddebug & VDB_FOLLOW) 555 printf("vndstrategy(%p): unit %d\n", bp, unit); 556 #endif 557 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 558 /* 559 * Limit the number of pending requests to not exhaust 560 * resources needed for I/O but always allow the worker 561 * thread to add requests, as a wedge on vnd queues 562 * requests with biodone() -> dkstart() -> vndstrategy(). 563 */ 564 if (curlwp != vnd->sc_kthread && curlwp != uvm.pagedaemon_lwp) { 565 while (vnd->sc_pending >= VND_MAXPENDING(vnd)) 566 tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0); 567 } 568 vnd->sc_pending++; 569 KASSERT(vnd->sc_pending > 0); 570 } 571 bufq_put(vnd->sc_tab, bp); 572 wakeup(&vnd->sc_tab); 573 splx(s); 574 return; 575 576 done: 577 bp->b_resid = bp->b_bcount; 578 biodone(bp); 579 splx(s); 580 } 581 582 static bool 583 vnode_has_strategy(struct vnd_softc *vnd) 584 { 585 return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 586 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 587 } 588 589 /* Verify that I/O requests cannot be smaller than the 590 * smallest I/O size supported by the backend. 591 */ 592 static bool 593 vnode_has_large_blocks(struct vnd_softc *vnd) 594 { 595 u_int32_t vnd_secsize, iosize; 596 597 iosize = vnd->sc_iosize; 598 vnd_secsize = vnd->sc_geom.vng_secsize; 599 600 return vnd_secsize % iosize != 0; 601 } 602 603 /* XXX this function needs a reliable check to detect 604 * sparse files. Otherwise, bmap/strategy may be used 605 * and fail on non-allocated blocks. VOP_READ/VOP_WRITE 606 * works on sparse files. 607 */ 608 #if notyet 609 static bool 610 vnode_strategy_probe(struct vnd_softc *vnd) 611 { 612 int error; 613 daddr_t nbn; 614 615 if (!vnode_has_strategy(vnd)) 616 return false; 617 618 if (vnode_has_large_blocks(vnd)) 619 return false; 620 621 /* Convert the first logical block number to its 622 * physical block number. 623 */ 624 error = 0; 625 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 626 error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL); 627 VOP_UNLOCK(vnd->sc_vp); 628 629 /* Test if that worked. */ 630 if (error == 0 && (long)nbn == -1) 631 return false; 632 633 return true; 634 } 635 #endif 636 637 static void 638 vndthread(void *arg) 639 { 640 struct vnd_softc *vnd = arg; 641 int s; 642 643 /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to 644 * directly access the backing vnode. If we can, use these two 645 * operations to avoid messing with the local buffer cache. 646 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 647 * which are guaranteed to work with any file system. */ 648 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 649 ! vnode_has_strategy(vnd)) 650 vnd->sc_flags |= VNF_USE_VN_RDWR; 651 652 /* VOP_STRATEGY can only be used if the backing vnode allows 653 * to access blocks as small as defined by the vnd geometry. 654 */ 655 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 656 vnode_has_large_blocks(vnd)) 657 vnd->sc_flags |= VNF_USE_VN_RDWR; 658 659 #ifdef DEBUG 660 if (vnddebug & VDB_INIT) 661 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 662 (vnd->sc_flags & VNF_USE_VN_RDWR) == 0 ? 663 "using bmap/strategy operations" : 664 "using read/write operations"); 665 #endif 666 667 s = splbio(); 668 vnd->sc_flags |= VNF_KTHREAD; 669 wakeup(&vnd->sc_kthread); 670 671 /* 672 * Dequeue requests and serve them depending on the available 673 * vnode operations. 674 */ 675 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 676 struct vndxfer *vnx; 677 struct buf *obp; 678 struct buf *bp; 679 680 obp = bufq_get(vnd->sc_tab); 681 if (obp == NULL) { 682 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 683 continue; 684 }; 685 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 686 KASSERT(vnd->sc_pending > 0); 687 if (vnd->sc_pending-- == VND_MAXPENDING(vnd)) 688 wakeup(&vnd->sc_pending); 689 } 690 splx(s); 691 #ifdef DEBUG 692 if (vnddebug & VDB_FOLLOW) 693 printf("vndthread(%p)\n", obp); 694 #endif 695 696 if (vnd->sc_vp->v_mount == NULL) { 697 obp->b_error = ENXIO; 698 goto done; 699 } 700 #ifdef VND_COMPRESSION 701 /* handle a compressed read */ 702 if ((obp->b_flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 703 off_t bn; 704 705 /* Convert to a byte offset within the file. */ 706 bn = obp->b_rawblkno * 707 vnd->sc_dkdev.dk_label->d_secsize; 708 709 compstrategy(obp, bn); 710 goto done; 711 } 712 #endif /* VND_COMPRESSION */ 713 714 /* 715 * Allocate a header for this transfer and link it to the 716 * buffer 717 */ 718 s = splbio(); 719 vnx = VND_GETXFER(vnd); 720 splx(s); 721 vnx->vx_vnd = vnd; 722 723 s = splbio(); 724 while (vnd->sc_active >= vnd->sc_maxactive) { 725 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 726 } 727 vnd->sc_active++; 728 splx(s); 729 730 /* Instrumentation. */ 731 disk_busy(&vnd->sc_dkdev); 732 733 bp = &vnx->vx_buf; 734 buf_init(bp); 735 bp->b_flags = (obp->b_flags & (B_READ | B_PHYS | B_RAW)); 736 bp->b_oflags = obp->b_oflags; 737 bp->b_cflags = obp->b_cflags; 738 bp->b_iodone = vndiodone; 739 bp->b_private = obp; 740 bp->b_vp = vnd->sc_vp; 741 bp->b_objlock = bp->b_vp->v_interlock; 742 bp->b_data = obp->b_data; 743 bp->b_bcount = obp->b_bcount; 744 BIO_COPYPRIO(bp, obp); 745 746 /* Make sure the request succeeds while suspending this fs. */ 747 fstrans_start_lazy(vnd->sc_vp->v_mount); 748 749 /* Handle the request using the appropriate operations. */ 750 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0) 751 handle_with_strategy(vnd, obp, bp); 752 else 753 handle_with_rdwr(vnd, obp, bp); 754 755 fstrans_done(vnd->sc_vp->v_mount); 756 757 s = splbio(); 758 continue; 759 760 done: 761 biodone(obp); 762 s = splbio(); 763 } 764 765 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 766 wakeup(&vnd->sc_kthread); 767 splx(s); 768 kthread_exit(0); 769 } 770 771 /* 772 * Checks if the given vnode supports the requested operation. 773 * The operation is specified the offset returned by VOFFSET. 774 * 775 * XXX The test below used to determine this is quite fragile 776 * because it relies on the file system to use genfs to specify 777 * unimplemented operations. There might be another way to do 778 * it more cleanly. 779 */ 780 static bool 781 vnode_has_op(const struct vnode *vp, int opoffset) 782 { 783 int (*defaultp)(void *); 784 int (*opp)(void *); 785 786 defaultp = vp->v_op[VOFFSET(vop_default)]; 787 opp = vp->v_op[opoffset]; 788 789 return opp != defaultp && opp != genfs_eopnotsupp && 790 opp != genfs_badop && opp != genfs_nullop; 791 } 792 793 /* 794 * Handles the read/write request given in 'bp' using the vnode's VOP_READ 795 * and VOP_WRITE operations. 796 * 797 * 'obp' is a pointer to the original request fed to the vnd device. 798 */ 799 static void 800 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 801 { 802 bool doread; 803 off_t offset; 804 size_t len, resid; 805 struct vnode *vp; 806 int npages; 807 808 doread = bp->b_flags & B_READ; 809 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 810 len = bp->b_bcount; 811 vp = vnd->sc_vp; 812 813 #if defined(DEBUG) 814 if (vnddebug & VDB_IO) 815 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 816 ", secsize %d, offset %" PRIu64 817 ", bcount %d\n", 818 vp, doread ? "read" : "write", obp->b_rawblkno, 819 vnd->sc_dkdev.dk_label->d_secsize, offset, 820 bp->b_bcount); 821 #endif 822 823 /* Issue the read or write operation. */ 824 bp->b_error = 825 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 826 vp, bp->b_data, len, offset, UIO_SYSSPACE, 827 IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_DIRECT, 828 vnd->sc_cred, &resid, NULL); 829 bp->b_resid = resid; 830 831 /* 832 * Avoid caching too many pages, the vnd user 833 * is usually a filesystem and caches itself. 834 * We need some amount of caching to not hinder 835 * read-ahead and write-behind operations. 836 */ 837 npages = atomic_load_relaxed(&vp->v_uobj.uo_npages); 838 if (npages > VND_MAXPAGES(vnd)) { 839 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 840 (void) VOP_PUTPAGES(vp, 0, 0, 841 PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE); 842 } 843 844 /* We need to increase the number of outputs on the vnode if 845 * there was any write to it. */ 846 if (!doread) { 847 mutex_enter(vp->v_interlock); 848 vp->v_numoutput++; 849 mutex_exit(vp->v_interlock); 850 } 851 852 biodone(bp); 853 } 854 855 /* 856 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 857 * and VOP_STRATEGY operations. 858 * 859 * 'obp' is a pointer to the original request fed to the vnd device. 860 */ 861 static void 862 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 863 struct buf *bp) 864 { 865 int bsize, error, flags, skipped; 866 size_t resid, sz; 867 off_t bn, offset; 868 struct vnode *vp; 869 struct buf *nbp = NULL; 870 871 flags = obp->b_flags; 872 873 874 /* convert to a byte offset within the file. */ 875 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 876 877 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 878 /* use default if the filesystem didn't specify a block size */ 879 if (bsize <= 0) 880 bsize = BLKDEV_IOSIZE; 881 skipped = 0; 882 883 /* 884 * Break the request into bsize pieces and feed them 885 * sequentially using VOP_BMAP/VOP_STRATEGY. 886 * We do it this way to keep from flooding NFS servers if we 887 * are connected to an NFS file. This places the burden on 888 * the client rather than the server. 889 */ 890 error = 0; 891 bp->b_resid = bp->b_bcount; 892 for (offset = 0, resid = bp->b_resid; /* true */; 893 resid -= sz, offset += sz) { 894 daddr_t nbn; 895 int off, nra; 896 897 nra = 0; 898 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 899 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 900 VOP_UNLOCK(vnd->sc_vp); 901 902 if (error == 0 && (long)nbn == -1) 903 error = EIO; 904 905 /* 906 * If there was an error or a hole in the file...punt. 907 * Note that we may have to wait for any operations 908 * that we have already fired off before releasing 909 * the buffer. 910 * 911 * XXX we could deal with holes here but it would be 912 * a hassle (in the write case). 913 */ 914 if (error) { 915 skipped += resid; 916 break; 917 } 918 919 #ifdef DEBUG 920 if (!dovndcluster) 921 nra = 0; 922 #endif 923 924 off = bn % bsize; 925 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 926 #ifdef DEBUG 927 if (vnddebug & VDB_IO) 928 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 929 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, 930 nbn, sz); 931 #endif 932 933 nbp = getiobuf(vp, true); 934 nestiobuf_setup(bp, nbp, offset, sz); 935 nbp->b_blkno = nbn + btodb(off); 936 937 #if 0 /* XXX #ifdef DEBUG */ 938 if (vnddebug & VDB_IO) 939 printf("vndstart(%ld): bp %p vp %p blkno " 940 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 941 (long) (vnd-vnd_softc), &nbp->vb_buf, 942 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 943 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 944 nbp->vb_buf.b_bcount); 945 #endif 946 if (resid == sz) { 947 break; 948 } 949 VOP_STRATEGY(vp, nbp); 950 bn += sz; 951 } 952 if (!(flags & B_READ)) { 953 struct vnode *w_vp; 954 /* 955 * this is the last nested buf, account for 956 * the parent buf write too. 957 * This has to be done last, so that 958 * fsync won't wait for this write which 959 * has no chance to complete before all nested bufs 960 * have been queued. But it has to be done 961 * before the last VOP_STRATEGY() 962 * or the call to nestiobuf_done(). 963 */ 964 w_vp = bp->b_vp; 965 mutex_enter(w_vp->v_interlock); 966 w_vp->v_numoutput++; 967 mutex_exit(w_vp->v_interlock); 968 } 969 KASSERT(skipped != 0 || nbp != NULL); 970 if (skipped) 971 nestiobuf_done(bp, skipped, error); 972 else 973 VOP_STRATEGY(vp, nbp); 974 } 975 976 static void 977 vndiodone(struct buf *bp) 978 { 979 struct vndxfer *vnx = VND_BUFTOXFER(bp); 980 struct vnd_softc *vnd = vnx->vx_vnd; 981 struct buf *obp = bp->b_private; 982 int s = splbio(); 983 984 KERNEL_LOCK(1, NULL); /* XXXSMP */ 985 KASSERT(&vnx->vx_buf == bp); 986 KASSERT(vnd->sc_active > 0); 987 #ifdef DEBUG 988 if (vnddebug & VDB_IO) { 989 printf("vndiodone1: bp %p iodone: error %d\n", 990 bp, bp->b_error); 991 } 992 #endif 993 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 994 (bp->b_flags & B_READ)); 995 vnd->sc_active--; 996 if (vnd->sc_active == 0) { 997 wakeup(&vnd->sc_tab); 998 } 999 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1000 splx(s); 1001 obp->b_error = bp->b_error; 1002 obp->b_resid = bp->b_resid; 1003 buf_destroy(bp); 1004 VND_PUTXFER(vnd, vnx); 1005 biodone(obp); 1006 } 1007 1008 /* ARGSUSED */ 1009 static int 1010 vndread(dev_t dev, struct uio *uio, int flags) 1011 { 1012 int unit = vndunit(dev); 1013 struct vnd_softc *sc; 1014 1015 #ifdef DEBUG 1016 if (vnddebug & VDB_FOLLOW) 1017 printf("vndread(0x%"PRIx64", %p)\n", dev, uio); 1018 #endif 1019 1020 sc = device_lookup_private(&vnd_cd, unit); 1021 if (sc == NULL) 1022 return ENXIO; 1023 1024 if ((sc->sc_flags & VNF_INITED) == 0) 1025 return ENXIO; 1026 1027 return physio(vndstrategy, NULL, dev, B_READ, minphys, uio); 1028 } 1029 1030 /* ARGSUSED */ 1031 static int 1032 vndwrite(dev_t dev, struct uio *uio, int flags) 1033 { 1034 int unit = vndunit(dev); 1035 struct vnd_softc *sc; 1036 1037 #ifdef DEBUG 1038 if (vnddebug & VDB_FOLLOW) 1039 printf("vndwrite(0x%"PRIx64", %p)\n", dev, uio); 1040 #endif 1041 1042 sc = device_lookup_private(&vnd_cd, unit); 1043 if (sc == NULL) 1044 return ENXIO; 1045 1046 if ((sc->sc_flags & VNF_INITED) == 0) 1047 return ENXIO; 1048 1049 return physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio); 1050 } 1051 1052 static int 1053 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 1054 { 1055 int error; 1056 struct vnd_softc *vnd; 1057 1058 if (*un == -1) 1059 *un = unit; 1060 if (*un < 0) 1061 return EINVAL; 1062 1063 vnd = device_lookup_private(&vnd_cd, *un); 1064 if (vnd == NULL) 1065 return -1; 1066 1067 if ((vnd->sc_flags & VNF_INITED) == 0) 1068 return -1; 1069 1070 vn_lock(vnd->sc_vp, LK_SHARED | LK_RETRY); 1071 error = VOP_GETATTR(vnd->sc_vp, va, l->l_cred); 1072 VOP_UNLOCK(vnd->sc_vp); 1073 return error; 1074 } 1075 1076 static int 1077 vnddoclear(struct vnd_softc *vnd, int pmask, int minor, bool force) 1078 { 1079 int error; 1080 1081 if ((error = vndlock(vnd)) != 0) 1082 return error; 1083 1084 /* 1085 * Don't unconfigure if any other partitions are open 1086 * or if both the character and block flavors of this 1087 * partition are open. 1088 */ 1089 if (DK_BUSY(vnd, pmask) && !force) { 1090 vndunlock(vnd); 1091 return EBUSY; 1092 } 1093 1094 /* Delete all of our wedges */ 1095 dkwedge_delall(&vnd->sc_dkdev); 1096 1097 /* 1098 * XXX vndclear() might call vndclose() implicitly; 1099 * release lock to avoid recursion 1100 * 1101 * Set VNF_CLEARING to prevent vndopen() from 1102 * sneaking in after we vndunlock(). 1103 */ 1104 vnd->sc_flags |= VNF_CLEARING; 1105 vndunlock(vnd); 1106 vndclear(vnd, minor); 1107 #ifdef DEBUG 1108 if (vnddebug & VDB_INIT) 1109 printf("%s: CLRed\n", __func__); 1110 #endif 1111 1112 /* Destroy the xfer and buffer pools. */ 1113 pool_destroy(&vnd->sc_vxpool); 1114 1115 /* Detach the disk. */ 1116 disk_detach(&vnd->sc_dkdev); 1117 1118 return 0; 1119 } 1120 1121 static int 1122 vndioctl_get(struct lwp *l, void *data, int unit, struct vattr *va) 1123 { 1124 int error; 1125 1126 KASSERT(l); 1127 1128 /* the first member is always int vnd_unit in all the versions */ 1129 if (*(int *)data >= vnd_cd.cd_ndevs) 1130 return ENXIO; 1131 1132 switch (error = vnd_cget(l, unit, (int *)data, va)) { 1133 case -1: 1134 /* unused is not an error */ 1135 memset(va, 0, sizeof(*va)); 1136 /*FALLTHROUGH*/ 1137 case 0: 1138 return 0; 1139 default: 1140 return error; 1141 } 1142 } 1143 1144 /* ARGSUSED */ 1145 static int 1146 vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1147 { 1148 bool force; 1149 int unit = vndunit(dev); 1150 struct vnd_softc *vnd; 1151 struct vnd_ioctl *vio; 1152 struct vattr vattr; 1153 struct pathbuf *pb; 1154 struct vnode *vp; 1155 int error, part, pmask; 1156 uint64_t geomsize; 1157 int fflags; 1158 #ifdef __HAVE_OLD_DISKLABEL 1159 struct disklabel newlabel; 1160 #endif 1161 1162 #ifdef DEBUG 1163 if (vnddebug & VDB_FOLLOW) 1164 printf("vndioctl(0x%"PRIx64", 0x%lx, %p, 0x%x, %p): unit %d\n", 1165 dev, cmd, data, flag, l->l_proc, unit); 1166 #endif 1167 /* Do the get's first; they don't need initialization or verification */ 1168 switch (cmd) { 1169 case VNDIOCGET: 1170 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) 1171 return error; 1172 1173 struct vnd_user *vnu = data; 1174 vnu->vnu_dev = vattr.va_fsid; 1175 vnu->vnu_ino = vattr.va_fileid; 1176 return 0; 1177 1178 default: 1179 /* First check for COMPAT_50 hook */ 1180 MODULE_HOOK_CALL(compat_vndioctl_50_hook, 1181 (cmd, l, data, unit, &vattr, vndioctl_get), 1182 enosys(), error); 1183 1184 /* 1185 * If not present, then COMPAT_30 hook also not 1186 * present, so just continue with checks for the 1187 * "write" commands 1188 */ 1189 if (error == ENOSYS) { 1190 error = 0; 1191 break; 1192 } 1193 1194 /* If not already handled, try the COMPAT_30 hook */ 1195 if (error == EPASSTHROUGH) 1196 MODULE_HOOK_CALL(compat_vndioctl_30_hook, 1197 (cmd, l, data, unit, &vattr, vndioctl_get), 1198 enosys(), error); 1199 1200 /* If no COMPAT_30 module, or not handled, check writes */ 1201 if (error == ENOSYS || error == EPASSTHROUGH) { 1202 error = 0; 1203 break; 1204 } 1205 return error; 1206 } 1207 1208 vnd = device_lookup_private(&vnd_cd, unit); 1209 if (vnd == NULL) 1210 return ENXIO; 1211 vio = (struct vnd_ioctl *)data; 1212 1213 /* Must be open for writes for these commands... */ 1214 switch (cmd) { 1215 case VNDIOCSET50: 1216 case VNDIOCCLR50: 1217 if (!compat_vndioctl_50_hook.hooked) 1218 return EINVAL; 1219 /* FALLTHROUGH */ 1220 case VNDIOCSET: 1221 case VNDIOCCLR: 1222 case DIOCSDINFO: 1223 case DIOCWDINFO: 1224 #ifdef __HAVE_OLD_DISKLABEL 1225 case ODIOCSDINFO: 1226 case ODIOCWDINFO: 1227 #endif 1228 case DIOCKLABEL: 1229 case DIOCWLABEL: 1230 case DIOCCACHESYNC: 1231 if ((flag & FWRITE) == 0) 1232 return EBADF; 1233 } 1234 1235 switch (cmd) { 1236 case VNDIOCSET50: 1237 case VNDIOCSET: 1238 /* Must not be initialized */ 1239 if (vnd->sc_flags & VNF_INITED) 1240 return EBUSY; 1241 break; 1242 default: 1243 /* Must be initialized */ 1244 if ((vnd->sc_flags & VNF_INITED) == 0) 1245 return ENXIO; 1246 break; 1247 } 1248 1249 error = disk_ioctl(&vnd->sc_dkdev, dev, cmd, data, flag, l); 1250 if (error != EPASSTHROUGH) 1251 return error; 1252 1253 switch (cmd) { 1254 case VNDIOCSET50: 1255 case VNDIOCSET: 1256 if ((error = vndlock(vnd)) != 0) 1257 return error; 1258 1259 fflags = FREAD; 1260 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 1261 fflags |= FWRITE; 1262 if ((vio->vnd_flags & VNDIOF_FILEIO) != 0) 1263 vnd->sc_flags |= VNF_USE_VN_RDWR; 1264 error = pathbuf_copyin(vio->vnd_file, &pb); 1265 if (error) { 1266 goto unlock_and_exit; 1267 } 1268 error = vn_open(NULL, pb, 0, fflags, 0, &vp, NULL, NULL); 1269 if (error != 0) { 1270 pathbuf_destroy(pb); 1271 goto unlock_and_exit; 1272 } 1273 KASSERT(l); 1274 error = VOP_GETATTR(vp, &vattr, l->l_cred); 1275 if (!error && vp->v_type != VREG) 1276 error = EOPNOTSUPP; 1277 if (!error && vattr.va_bytes < vattr.va_size) 1278 /* File is definitely sparse, use vn_rdwr() */ 1279 vnd->sc_flags |= VNF_USE_VN_RDWR; 1280 if (error) { 1281 VOP_UNLOCK(vp); 1282 goto close_and_exit; 1283 } 1284 1285 /* If using a compressed file, initialize its info */ 1286 /* (or abort with an error if kernel has no compression) */ 1287 if (vio->vnd_flags & VNDIOF_COMP) { 1288 #ifdef VND_COMPRESSION 1289 struct vnd_comp_header *ch; 1290 int i; 1291 uint32_t comp_size; 1292 uint32_t comp_maxsize; 1293 1294 /* allocate space for compressed file header */ 1295 ch = malloc(sizeof(struct vnd_comp_header), 1296 M_TEMP, M_WAITOK); 1297 1298 /* read compressed file header */ 1299 error = vn_rdwr(UIO_READ, vp, (void *)ch, 1300 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1301 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1302 if (error) { 1303 free(ch, M_TEMP); 1304 VOP_UNLOCK(vp); 1305 goto close_and_exit; 1306 } 1307 1308 if (be32toh(ch->block_size) == 0 || 1309 be32toh(ch->num_blocks) > UINT32_MAX - 1) { 1310 free(ch, M_TEMP); 1311 VOP_UNLOCK(vp); 1312 goto close_and_exit; 1313 } 1314 1315 /* save some header info */ 1316 vnd->sc_comp_blksz = be32toh(ch->block_size); 1317 /* note last offset is the file byte size */ 1318 vnd->sc_comp_numoffs = be32toh(ch->num_blocks) + 1; 1319 free(ch, M_TEMP); 1320 if (!DK_DEV_BSIZE_OK(vnd->sc_comp_blksz)) { 1321 VOP_UNLOCK(vp); 1322 error = EINVAL; 1323 goto close_and_exit; 1324 } 1325 KASSERT(0 < vnd->sc_comp_blksz); 1326 KASSERT(0 < vnd->sc_comp_numoffs); 1327 /* 1328 * @#^@!$& gcc -Wtype-limits refuses to let me 1329 * write SIZE_MAX/sizeof(uint64_t) < numoffs, 1330 * because the range of the type on amd64 makes 1331 * the comparisons always false. 1332 */ 1333 #if SIZE_MAX <= UINT32_MAX*(64/CHAR_BIT) 1334 if (SIZE_MAX/sizeof(uint64_t) < vnd->sc_comp_numoffs) { 1335 VOP_UNLOCK(vp); 1336 error = EINVAL; 1337 goto close_and_exit; 1338 } 1339 #endif 1340 if ((vattr.va_size < sizeof(struct vnd_comp_header)) || 1341 (vattr.va_size - sizeof(struct vnd_comp_header) < 1342 sizeof(uint64_t)*vnd->sc_comp_numoffs) || 1343 (UQUAD_MAX/vnd->sc_comp_blksz < 1344 vnd->sc_comp_numoffs - 1)) { 1345 VOP_UNLOCK(vp); 1346 error = EINVAL; 1347 goto close_and_exit; 1348 } 1349 1350 /* set decompressed file size */ 1351 KASSERT(vnd->sc_comp_numoffs - 1 <= 1352 UQUAD_MAX/vnd->sc_comp_blksz); 1353 vattr.va_size = 1354 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1355 (u_quad_t)vnd->sc_comp_blksz; 1356 1357 /* allocate space for all the compressed offsets */ 1358 __CTASSERT(UINT32_MAX <= UQUAD_MAX/sizeof(uint64_t)); 1359 vnd->sc_comp_offsets = 1360 malloc(sizeof(uint64_t) * vnd->sc_comp_numoffs, 1361 M_DEVBUF, M_WAITOK); 1362 1363 /* read in the offsets */ 1364 error = vn_rdwr(UIO_READ, vp, 1365 (void *)vnd->sc_comp_offsets, 1366 sizeof(uint64_t) * vnd->sc_comp_numoffs, 1367 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1368 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1369 if (error) { 1370 VOP_UNLOCK(vp); 1371 goto close_and_exit; 1372 } 1373 /* 1374 * find largest block size (used for allocation limit). 1375 * Also convert offset to native byte order. 1376 */ 1377 comp_maxsize = 0; 1378 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1379 vnd->sc_comp_offsets[i] = 1380 be64toh(vnd->sc_comp_offsets[i]); 1381 comp_size = 1382 be64toh(vnd->sc_comp_offsets[i + 1]) 1383 - vnd->sc_comp_offsets[i]; 1384 if (comp_size > comp_maxsize) 1385 comp_maxsize = comp_size; 1386 } 1387 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1388 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs 1389 - 1]); 1390 1391 /* create compressed data buffer */ 1392 vnd->sc_comp_buff = malloc(comp_maxsize, 1393 M_DEVBUF, M_WAITOK); 1394 1395 /* create decompressed buffer */ 1396 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1397 M_DEVBUF, M_WAITOK); 1398 vnd->sc_comp_buffblk = -1; 1399 1400 /* Initialize decompress stream */ 1401 memset(&vnd->sc_comp_stream, 0, sizeof(z_stream)); 1402 vnd->sc_comp_stream.zalloc = vnd_alloc; 1403 vnd->sc_comp_stream.zfree = vnd_free; 1404 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1405 if (error) { 1406 if (vnd->sc_comp_stream.msg) 1407 printf("vnd%d: compressed file, %s\n", 1408 unit, vnd->sc_comp_stream.msg); 1409 VOP_UNLOCK(vp); 1410 error = EINVAL; 1411 goto close_and_exit; 1412 } 1413 1414 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1415 #else /* !VND_COMPRESSION */ 1416 VOP_UNLOCK(vp); 1417 error = EOPNOTSUPP; 1418 goto close_and_exit; 1419 #endif /* VND_COMPRESSION */ 1420 } 1421 1422 VOP_UNLOCK(vp); 1423 vnd->sc_vp = vp; 1424 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1425 1426 /* get smallest I/O size for underlying device, fall back to 1427 * fundamental I/O size of underlying filesystem 1428 */ 1429 error = bdev_ioctl(vattr.va_fsid, DIOCGSECTORSIZE, &vnd->sc_iosize, FKIOCTL, l); 1430 if (error) 1431 vnd->sc_iosize = vnd->sc_vp->v_mount->mnt_stat.f_frsize; 1432 1433 /* Default I/O size to DEV_BSIZE */ 1434 if (vnd->sc_iosize == 0) 1435 vnd->sc_iosize = DEV_BSIZE; 1436 1437 /* 1438 * Use pseudo-geometry specified. If none was provided, 1439 * use "standard" Adaptec fictitious geometry. 1440 */ 1441 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1442 1443 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1444 sizeof(vio->vnd_geom)); 1445 1446 /* 1447 * Sanity-check the sector size. 1448 */ 1449 if (!DK_DEV_BSIZE_OK(vnd->sc_geom.vng_secsize) || 1450 vnd->sc_geom.vng_ntracks == 0 || 1451 vnd->sc_geom.vng_nsectors == 0) { 1452 error = EINVAL; 1453 goto close_and_exit; 1454 } 1455 1456 /* 1457 * Compute missing cylinder count from size 1458 */ 1459 if (vnd->sc_geom.vng_ncylinders == 0) 1460 vnd->sc_geom.vng_ncylinders = vnd->sc_size / ( 1461 (vnd->sc_geom.vng_secsize / DEV_BSIZE) * 1462 vnd->sc_geom.vng_ntracks * 1463 vnd->sc_geom.vng_nsectors); 1464 1465 /* 1466 * Compute the size (in DEV_BSIZE blocks) specified 1467 * by the geometry. 1468 */ 1469 geomsize = (int64_t)vnd->sc_geom.vng_nsectors * 1470 vnd->sc_geom.vng_ntracks * 1471 vnd->sc_geom.vng_ncylinders * 1472 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1473 1474 /* 1475 * Sanity-check the size against the specified 1476 * geometry. 1477 */ 1478 if (vnd->sc_size < geomsize) { 1479 error = EINVAL; 1480 goto close_and_exit; 1481 } 1482 } else if (vnd->sc_size >= (32 * 64)) { 1483 /* 1484 * Size must be at least 2048 DEV_BSIZE blocks 1485 * (1M) in order to use this geometry. 1486 */ 1487 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1488 vnd->sc_geom.vng_nsectors = 32; 1489 vnd->sc_geom.vng_ntracks = 64; 1490 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1491 } else { 1492 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1493 vnd->sc_geom.vng_nsectors = 1; 1494 vnd->sc_geom.vng_ntracks = 1; 1495 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1496 } 1497 1498 vnd_set_geometry(vnd); 1499 1500 if (vio->vnd_flags & VNDIOF_READONLY) { 1501 vnd->sc_flags |= VNF_READONLY; 1502 } 1503 1504 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1505 goto close_and_exit; 1506 1507 vndthrottle(vnd, vnd->sc_vp); 1508 vio->vnd_osize = dbtob(vnd->sc_size); 1509 if (cmd != VNDIOCSET50) 1510 vio->vnd_size = dbtob(vnd->sc_size); 1511 vnd->sc_flags |= VNF_INITED; 1512 1513 /* create the kernel thread, wait for it to be up */ 1514 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, 1515 &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev)); 1516 if (error) 1517 goto close_and_exit; 1518 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1519 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1520 } 1521 #ifdef DEBUG 1522 if (vnddebug & VDB_INIT) 1523 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1524 vnd->sc_vp, (unsigned long) vnd->sc_size, 1525 vnd->sc_geom.vng_secsize, 1526 vnd->sc_geom.vng_nsectors, 1527 vnd->sc_geom.vng_ntracks, 1528 vnd->sc_geom.vng_ncylinders); 1529 #endif 1530 1531 /* Attach the disk. */ 1532 disk_attach(&vnd->sc_dkdev); 1533 1534 /* Initialize the xfer and buffer pools. */ 1535 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1536 0, 0, "vndxpl", NULL, IPL_BIO); 1537 1538 vndunlock(vnd); 1539 1540 pathbuf_destroy(pb); 1541 1542 /* Discover wedges on this disk */ 1543 dkwedge_discover(&vnd->sc_dkdev); 1544 1545 break; 1546 1547 close_and_exit: 1548 (void) vn_close(vp, fflags, l->l_cred); 1549 pathbuf_destroy(pb); 1550 unlock_and_exit: 1551 #ifdef VND_COMPRESSION 1552 /* free any allocated memory (for compressed file) */ 1553 if (vnd->sc_comp_offsets) { 1554 free(vnd->sc_comp_offsets, M_DEVBUF); 1555 vnd->sc_comp_offsets = NULL; 1556 } 1557 if (vnd->sc_comp_buff) { 1558 free(vnd->sc_comp_buff, M_DEVBUF); 1559 vnd->sc_comp_buff = NULL; 1560 } 1561 if (vnd->sc_comp_decombuf) { 1562 free(vnd->sc_comp_decombuf, M_DEVBUF); 1563 vnd->sc_comp_decombuf = NULL; 1564 } 1565 #endif /* VND_COMPRESSION */ 1566 vndunlock(vnd); 1567 return error; 1568 1569 case VNDIOCCLR50: 1570 case VNDIOCCLR: 1571 part = DISKPART(dev); 1572 pmask = (1 << part); 1573 force = (vio->vnd_flags & VNDIOF_FORCE) != 0; 1574 1575 if ((error = vnddoclear(vnd, pmask, minor(dev), force)) != 0) 1576 return error; 1577 1578 break; 1579 1580 1581 case DIOCWDINFO: 1582 case DIOCSDINFO: 1583 #ifdef __HAVE_OLD_DISKLABEL 1584 case ODIOCWDINFO: 1585 case ODIOCSDINFO: 1586 #endif 1587 { 1588 struct disklabel *lp; 1589 1590 if ((error = vndlock(vnd)) != 0) 1591 return error; 1592 1593 vnd->sc_flags |= VNF_LABELLING; 1594 1595 #ifdef __HAVE_OLD_DISKLABEL 1596 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1597 memset(&newlabel, 0, sizeof newlabel); 1598 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1599 lp = &newlabel; 1600 } else 1601 #endif 1602 lp = (struct disklabel *)data; 1603 1604 error = setdisklabel(vnd->sc_dkdev.dk_label, 1605 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1606 if (error == 0) { 1607 if (cmd == DIOCWDINFO 1608 #ifdef __HAVE_OLD_DISKLABEL 1609 || cmd == ODIOCWDINFO 1610 #endif 1611 ) 1612 error = writedisklabel(VNDLABELDEV(dev), 1613 vndstrategy, vnd->sc_dkdev.dk_label, 1614 vnd->sc_dkdev.dk_cpulabel); 1615 } 1616 1617 vnd->sc_flags &= ~VNF_LABELLING; 1618 1619 vndunlock(vnd); 1620 1621 if (error) 1622 return error; 1623 break; 1624 } 1625 1626 case DIOCKLABEL: 1627 if (*(int *)data != 0) 1628 vnd->sc_flags |= VNF_KLABEL; 1629 else 1630 vnd->sc_flags &= ~VNF_KLABEL; 1631 break; 1632 1633 case DIOCWLABEL: 1634 if (*(int *)data != 0) 1635 vnd->sc_flags |= VNF_WLABEL; 1636 else 1637 vnd->sc_flags &= ~VNF_WLABEL; 1638 break; 1639 1640 case DIOCGDEFLABEL: 1641 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1642 break; 1643 1644 #ifdef __HAVE_OLD_DISKLABEL 1645 case ODIOCGDEFLABEL: 1646 vndgetdefaultlabel(vnd, &newlabel); 1647 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1648 return ENOTTY; 1649 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1650 break; 1651 #endif 1652 1653 case DIOCGSTRATEGY: 1654 { 1655 struct disk_strategy *dks = (void *)data; 1656 1657 /* No lock needed, never changed */ 1658 strlcpy(dks->dks_name, 1659 bufq_getstrategyname(vnd->sc_tab), 1660 sizeof(dks->dks_name)); 1661 dks->dks_paramlen = 0; 1662 break; 1663 } 1664 case DIOCGCACHE: 1665 { 1666 int *bits = (int *)data; 1667 *bits |= DKCACHE_READ | DKCACHE_WRITE; 1668 break; 1669 } 1670 case DIOCCACHESYNC: 1671 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1672 error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred, 1673 FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0); 1674 VOP_UNLOCK(vnd->sc_vp); 1675 return error; 1676 1677 default: 1678 return ENOTTY; 1679 } 1680 1681 return 0; 1682 } 1683 1684 /* 1685 * Duplicate the current processes' credentials. Since we are called only 1686 * as the result of a SET ioctl and only root can do that, any future access 1687 * to this "disk" is essentially as root. Note that credentials may change 1688 * if some other uid can write directly to the mapped file (NFS). 1689 */ 1690 static int 1691 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1692 { 1693 struct uio auio; 1694 struct iovec aiov; 1695 char *tmpbuf; 1696 int error; 1697 1698 vnd->sc_cred = kauth_cred_dup(cred); 1699 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1700 1701 /* XXX: Horrible kludge to establish credentials for NFS */ 1702 aiov.iov_base = tmpbuf; 1703 aiov.iov_len = uimin(DEV_BSIZE, dbtob(vnd->sc_size)); 1704 auio.uio_iov = &aiov; 1705 auio.uio_iovcnt = 1; 1706 auio.uio_offset = 0; 1707 auio.uio_rw = UIO_READ; 1708 auio.uio_resid = aiov.iov_len; 1709 UIO_SETUP_SYSSPACE(&auio); 1710 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1711 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1712 if (error == 0) { 1713 /* 1714 * Because vnd does all IO directly through the vnode 1715 * we need to flush (at least) the buffer from the above 1716 * VOP_READ from the buffer cache to prevent cache 1717 * incoherencies. Also, be careful to write dirty 1718 * buffers back to stable storage. 1719 */ 1720 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1721 curlwp, 0, 0); 1722 } 1723 VOP_UNLOCK(vnd->sc_vp); 1724 1725 free(tmpbuf, M_TEMP); 1726 return error; 1727 } 1728 1729 /* 1730 * Set maxactive based on FS type 1731 */ 1732 static void 1733 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1734 { 1735 1736 if (vp->v_tag == VT_NFS) 1737 vnd->sc_maxactive = 2; 1738 else 1739 vnd->sc_maxactive = 8; 1740 1741 if (vnd->sc_maxactive < 1) 1742 vnd->sc_maxactive = 1; 1743 } 1744 1745 #if 0 1746 static void 1747 vndshutdown(void) 1748 { 1749 struct vnd_softc *vnd; 1750 1751 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1752 if (vnd->sc_flags & VNF_INITED) 1753 vndclear(vnd); 1754 } 1755 #endif 1756 1757 static void 1758 vndclear(struct vnd_softc *vnd, int myminor) 1759 { 1760 struct vnode *vp = vnd->sc_vp; 1761 int fflags = FREAD; 1762 int bmaj, cmaj, i, mn; 1763 int s; 1764 1765 #ifdef DEBUG 1766 if (vnddebug & VDB_FOLLOW) 1767 printf("vndclear(%p): vp %p\n", vnd, vp); 1768 #endif 1769 /* locate the major number */ 1770 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1771 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1772 1773 /* Nuke the vnodes for any open instances */ 1774 for (i = 0; i < MAXPARTITIONS; i++) { 1775 mn = DISKMINOR(device_unit(vnd->sc_dev), i); 1776 if (mn != myminor) { /* XXX avoid to kill own vnode */ 1777 vdevgone(bmaj, mn, mn, VBLK); 1778 vdevgone(cmaj, mn, mn, VCHR); 1779 } 1780 } 1781 1782 if ((vnd->sc_flags & VNF_READONLY) == 0) 1783 fflags |= FWRITE; 1784 1785 s = splbio(); 1786 bufq_drain(vnd->sc_tab); 1787 splx(s); 1788 1789 vnd->sc_flags |= VNF_VUNCONF; 1790 wakeup(&vnd->sc_tab); 1791 while (vnd->sc_flags & VNF_KTHREAD) 1792 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1793 1794 #ifdef VND_COMPRESSION 1795 /* free the compressed file buffers */ 1796 if (vnd->sc_flags & VNF_COMP) { 1797 if (vnd->sc_comp_offsets) { 1798 free(vnd->sc_comp_offsets, M_DEVBUF); 1799 vnd->sc_comp_offsets = NULL; 1800 } 1801 if (vnd->sc_comp_buff) { 1802 free(vnd->sc_comp_buff, M_DEVBUF); 1803 vnd->sc_comp_buff = NULL; 1804 } 1805 if (vnd->sc_comp_decombuf) { 1806 free(vnd->sc_comp_decombuf, M_DEVBUF); 1807 vnd->sc_comp_decombuf = NULL; 1808 } 1809 } 1810 #endif /* VND_COMPRESSION */ 1811 vnd->sc_flags &= 1812 ~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL 1813 | VNF_VUNCONF | VNF_COMP | VNF_CLEARING); 1814 if (vp == NULL) 1815 panic("vndclear: null vp"); 1816 (void) vn_close(vp, fflags, vnd->sc_cred); 1817 kauth_cred_free(vnd->sc_cred); 1818 vnd->sc_vp = NULL; 1819 vnd->sc_cred = NULL; 1820 vnd->sc_size = 0; 1821 } 1822 1823 static int 1824 vndsize(dev_t dev) 1825 { 1826 struct vnd_softc *sc; 1827 struct disklabel *lp; 1828 int part, unit, omask; 1829 int size; 1830 1831 unit = vndunit(dev); 1832 sc = device_lookup_private(&vnd_cd, unit); 1833 if (sc == NULL) 1834 return -1; 1835 1836 if ((sc->sc_flags & VNF_INITED) == 0) 1837 return -1; 1838 1839 part = DISKPART(dev); 1840 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1841 lp = sc->sc_dkdev.dk_label; 1842 1843 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1844 return -1; 1845 1846 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1847 size = -1; 1848 else 1849 size = lp->d_partitions[part].p_size * 1850 (lp->d_secsize / DEV_BSIZE); 1851 1852 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1853 return -1; 1854 1855 return size; 1856 } 1857 1858 static int 1859 vnddump(dev_t dev, daddr_t blkno, void *va, 1860 size_t size) 1861 { 1862 1863 /* Not implemented. */ 1864 return ENXIO; 1865 } 1866 1867 static void 1868 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1869 { 1870 struct vndgeom *vng = &sc->sc_geom; 1871 struct partition *pp; 1872 unsigned spb; 1873 1874 memset(lp, 0, sizeof(*lp)); 1875 1876 spb = vng->vng_secsize / DEV_BSIZE; 1877 if (sc->sc_size / spb > UINT32_MAX) 1878 lp->d_secperunit = UINT32_MAX; 1879 else 1880 lp->d_secperunit = sc->sc_size / spb; 1881 lp->d_secsize = vng->vng_secsize; 1882 lp->d_nsectors = vng->vng_nsectors; 1883 lp->d_ntracks = vng->vng_ntracks; 1884 lp->d_ncylinders = vng->vng_ncylinders; 1885 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1886 1887 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1888 lp->d_type = DKTYPE_VND; 1889 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1890 lp->d_rpm = 3600; 1891 lp->d_interleave = 1; 1892 lp->d_flags = 0; 1893 1894 pp = &lp->d_partitions[RAW_PART]; 1895 pp->p_offset = 0; 1896 pp->p_size = lp->d_secperunit; 1897 pp->p_fstype = FS_UNUSED; 1898 lp->d_npartitions = RAW_PART + 1; 1899 1900 lp->d_magic = DISKMAGIC; 1901 lp->d_magic2 = DISKMAGIC; 1902 lp->d_checksum = dkcksum(lp); 1903 } 1904 1905 /* 1906 * Read the disklabel from a vnd. If one is not present, create a fake one. 1907 */ 1908 static void 1909 vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1910 { 1911 const char *errstring; 1912 struct disklabel *lp = sc->sc_dkdev.dk_label; 1913 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1914 int i; 1915 1916 memset(clp, 0, sizeof(*clp)); 1917 1918 vndgetdefaultlabel(sc, lp); 1919 1920 /* 1921 * Call the generic disklabel extraction routine. 1922 */ 1923 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1924 if (errstring) { 1925 /* 1926 * Lack of disklabel is common, but we print the warning 1927 * anyway, since it might contain other useful information. 1928 */ 1929 aprint_normal_dev(sc->sc_dev, "%s\n", errstring); 1930 1931 /* 1932 * For historical reasons, if there's no disklabel 1933 * present, all partitions must be FS_BSDFFS and 1934 * occupy the entire disk. 1935 */ 1936 for (i = 0; i < MAXPARTITIONS; i++) { 1937 /* 1938 * Don't wipe out port specific hack (such as 1939 * dos partition hack of i386 port). 1940 */ 1941 if (lp->d_partitions[i].p_size != 0) 1942 continue; 1943 1944 lp->d_partitions[i].p_size = lp->d_secperunit; 1945 lp->d_partitions[i].p_offset = 0; 1946 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1947 } 1948 1949 strncpy(lp->d_packname, "default label", 1950 sizeof(lp->d_packname)); 1951 1952 lp->d_npartitions = MAXPARTITIONS; 1953 lp->d_checksum = dkcksum(lp); 1954 } 1955 } 1956 1957 /* 1958 * Wait interruptibly for an exclusive lock. 1959 * 1960 * XXX 1961 * Several drivers do this; it should be abstracted and made MP-safe. 1962 */ 1963 static int 1964 vndlock(struct vnd_softc *sc) 1965 { 1966 int error; 1967 1968 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1969 sc->sc_flags |= VNF_WANTED; 1970 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1971 return error; 1972 } 1973 sc->sc_flags |= VNF_LOCKED; 1974 return 0; 1975 } 1976 1977 /* 1978 * Unlock and wake up any waiters. 1979 */ 1980 static void 1981 vndunlock(struct vnd_softc *sc) 1982 { 1983 1984 sc->sc_flags &= ~VNF_LOCKED; 1985 if ((sc->sc_flags & VNF_WANTED) != 0) { 1986 sc->sc_flags &= ~VNF_WANTED; 1987 wakeup(sc); 1988 } 1989 } 1990 1991 #ifdef VND_COMPRESSION 1992 /* compressed file read */ 1993 static void 1994 compstrategy(struct buf *bp, off_t bn) 1995 { 1996 int error; 1997 int unit = vndunit(bp->b_dev); 1998 struct vnd_softc *vnd = 1999 device_lookup_private(&vnd_cd, unit); 2000 u_int32_t comp_block; 2001 struct uio auio; 2002 char *addr; 2003 int s; 2004 2005 /* set up constants for data move */ 2006 auio.uio_rw = UIO_READ; 2007 UIO_SETUP_SYSSPACE(&auio); 2008 2009 /* read, and transfer the data */ 2010 addr = bp->b_data; 2011 bp->b_resid = bp->b_bcount; 2012 s = splbio(); 2013 while (bp->b_resid > 0) { 2014 unsigned length; 2015 size_t length_in_buffer; 2016 u_int32_t offset_in_buffer; 2017 struct iovec aiov; 2018 2019 /* calculate the compressed block number */ 2020 comp_block = bn / (off_t)vnd->sc_comp_blksz; 2021 2022 /* check for good block number */ 2023 if (comp_block >= vnd->sc_comp_numoffs) { 2024 bp->b_error = EINVAL; 2025 splx(s); 2026 return; 2027 } 2028 2029 /* read in the compressed block, if not in buffer */ 2030 if (comp_block != vnd->sc_comp_buffblk) { 2031 length = vnd->sc_comp_offsets[comp_block + 1] - 2032 vnd->sc_comp_offsets[comp_block]; 2033 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 2034 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 2035 length, vnd->sc_comp_offsets[comp_block], 2036 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred, 2037 NULL, NULL); 2038 if (error) { 2039 bp->b_error = error; 2040 VOP_UNLOCK(vnd->sc_vp); 2041 splx(s); 2042 return; 2043 } 2044 /* uncompress the buffer */ 2045 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 2046 vnd->sc_comp_stream.avail_in = length; 2047 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 2048 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 2049 inflateReset(&vnd->sc_comp_stream); 2050 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 2051 if (error != Z_STREAM_END) { 2052 if (vnd->sc_comp_stream.msg) 2053 aprint_normal_dev(vnd->sc_dev, 2054 "compressed file, %s\n", 2055 vnd->sc_comp_stream.msg); 2056 bp->b_error = EBADMSG; 2057 VOP_UNLOCK(vnd->sc_vp); 2058 splx(s); 2059 return; 2060 } 2061 vnd->sc_comp_buffblk = comp_block; 2062 VOP_UNLOCK(vnd->sc_vp); 2063 } 2064 2065 /* transfer the usable uncompressed data */ 2066 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 2067 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 2068 if (length_in_buffer > bp->b_resid) 2069 length_in_buffer = bp->b_resid; 2070 auio.uio_iov = &aiov; 2071 auio.uio_iovcnt = 1; 2072 aiov.iov_base = addr; 2073 aiov.iov_len = length_in_buffer; 2074 auio.uio_resid = aiov.iov_len; 2075 auio.uio_offset = 0; 2076 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 2077 length_in_buffer, &auio); 2078 if (error) { 2079 bp->b_error = error; 2080 splx(s); 2081 return; 2082 } 2083 2084 bn += length_in_buffer; 2085 addr += length_in_buffer; 2086 bp->b_resid -= length_in_buffer; 2087 } 2088 splx(s); 2089 } 2090 2091 /* compression memory allocation routines */ 2092 static void * 2093 vnd_alloc(void *aux, u_int items, u_int siz) 2094 { 2095 return malloc(items * siz, M_TEMP, M_NOWAIT); 2096 } 2097 2098 static void 2099 vnd_free(void *aux, void *ptr) 2100 { 2101 free(ptr, M_TEMP); 2102 } 2103 #endif /* VND_COMPRESSION */ 2104 2105 static void 2106 vnd_set_geometry(struct vnd_softc *vnd) 2107 { 2108 struct disk_geom *dg = &vnd->sc_dkdev.dk_geom; 2109 unsigned spb; 2110 2111 memset(dg, 0, sizeof(*dg)); 2112 2113 spb = vnd->sc_geom.vng_secsize / DEV_BSIZE; 2114 dg->dg_secperunit = vnd->sc_size / spb; 2115 dg->dg_secsize = vnd->sc_geom.vng_secsize; 2116 dg->dg_nsectors = vnd->sc_geom.vng_nsectors; 2117 dg->dg_ntracks = vnd->sc_geom.vng_ntracks; 2118 dg->dg_ncylinders = vnd->sc_geom.vng_ncylinders; 2119 2120 #ifdef DEBUG 2121 if (vnddebug & VDB_LABEL) { 2122 printf("dg->dg_secperunit: %" PRId64 "\n", dg->dg_secperunit); 2123 printf("dg->dg_ncylinders: %u\n", dg->dg_ncylinders); 2124 } 2125 #endif 2126 disk_set_info(vnd->sc_dev, &vnd->sc_dkdev, NULL); 2127 } 2128 2129 #ifdef VND_COMPRESSION 2130 #define VND_DEPENDS "zlib" 2131 #else 2132 #define VND_DEPENDS NULL 2133 #endif 2134 2135 MODULE(MODULE_CLASS_DRIVER, vnd, VND_DEPENDS); 2136 2137 #ifdef _MODULE 2138 int vnd_bmajor = -1, vnd_cmajor = -1; 2139 2140 CFDRIVER_DECL(vnd, DV_DISK, NULL); 2141 #endif 2142 2143 static int 2144 vnd_modcmd(modcmd_t cmd, void *arg) 2145 { 2146 int error = 0; 2147 2148 switch (cmd) { 2149 case MODULE_CMD_INIT: 2150 #ifdef _MODULE 2151 /* 2152 * Attach the {b,c}devsw's 2153 */ 2154 error = devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2155 &vnd_cdevsw, &vnd_cmajor); 2156 if (error) { 2157 #ifdef DIAGNOSTIC 2158 aprint_error("%s: unable to attach %s devsw, " 2159 "error %d", __func__, vnd_cd.cd_name, error); 2160 #endif 2161 break; 2162 } 2163 2164 error = config_cfdriver_attach(&vnd_cd); 2165 if (error) { 2166 devsw_detach(&vnd_bdevsw, &vnd_cdevsw); 2167 break; 2168 } 2169 2170 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2171 if (error) { 2172 config_cfdriver_detach(&vnd_cd); 2173 devsw_detach(&vnd_bdevsw, &vnd_cdevsw); 2174 #ifdef DIAGNOSTIC 2175 aprint_error("%s: unable to register cfattach for \n" 2176 "%s, error %d", __func__, vnd_cd.cd_name, error); 2177 #endif 2178 break; 2179 } 2180 #endif 2181 break; 2182 2183 case MODULE_CMD_FINI: 2184 #ifdef _MODULE 2185 /* 2186 * Remove device from autoconf database 2187 */ 2188 error = config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); 2189 if (error) { 2190 #ifdef DIAGNOSTIC 2191 aprint_error("%s: failed to detach %s cfattach, " 2192 "error %d\n", __func__, vnd_cd.cd_name, error); 2193 #endif 2194 break; 2195 } 2196 error = config_cfdriver_detach(&vnd_cd); 2197 if (error) { 2198 (void)config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2199 #ifdef DIAGNOSTIC 2200 aprint_error("%s: failed to detach %s cfdriver, " 2201 "error %d\n", __func__, vnd_cd.cd_name, error); 2202 break; 2203 #endif 2204 } 2205 /* 2206 * Remove {b,c}devsw's 2207 */ 2208 devsw_detach(&vnd_bdevsw, &vnd_cdevsw); 2209 2210 #endif 2211 break; 2212 2213 case MODULE_CMD_STAT: 2214 return ENOTTY; 2215 2216 default: 2217 return ENOTTY; 2218 } 2219 2220 return error; 2221 } 2222