1 /* $NetBSD: vnd.c,v 1.272 2019/03/01 11:06:56 pgoyette Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 66 * 67 * @(#)vn.c 8.9 (Berkeley) 5/14/95 68 */ 69 70 /* 71 * Vnode disk driver. 72 * 73 * Block/character interface to a vnode. Allows one to treat a file 74 * as a disk (e.g. build a filesystem in it, mount it, etc.). 75 * 76 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 77 * this uses them to avoid distorting the local buffer cache. If those 78 * block-level operations are not available, this falls back to the regular 79 * read and write calls. Using these may distort the cache in some cases 80 * but better have the driver working than preventing it to work on file 81 * systems where the block-level operations are not implemented for 82 * whatever reason. 83 * 84 * NOTE 2: There is a security issue involved with this driver. 85 * Once mounted all access to the contents of the "mapped" file via 86 * the special file is controlled by the permissions on the special 87 * file, the protection of the mapped file is ignored (effectively, 88 * by using root credentials in all transactions). 89 * 90 * NOTE 3: Doesn't interact with leases, should it? 91 */ 92 93 #include <sys/cdefs.h> 94 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.272 2019/03/01 11:06:56 pgoyette Exp $"); 95 96 #if defined(_KERNEL_OPT) 97 #include "opt_vnd.h" 98 #include "opt_compat_netbsd.h" 99 #endif 100 101 #include <sys/param.h> 102 #include <sys/systm.h> 103 #include <sys/namei.h> 104 #include <sys/proc.h> 105 #include <sys/kthread.h> 106 #include <sys/errno.h> 107 #include <sys/buf.h> 108 #include <sys/bufq.h> 109 #include <sys/malloc.h> 110 #include <sys/ioctl.h> 111 #include <sys/disklabel.h> 112 #include <sys/device.h> 113 #include <sys/disk.h> 114 #include <sys/stat.h> 115 #include <sys/mount.h> 116 #include <sys/vnode.h> 117 #include <sys/fstrans.h> 118 #include <sys/file.h> 119 #include <sys/uio.h> 120 #include <sys/conf.h> 121 #include <sys/kauth.h> 122 #include <sys/module.h> 123 #include <sys/compat_stub.h> 124 125 #include <net/zlib.h> 126 127 #include <miscfs/genfs/genfs.h> 128 #include <miscfs/specfs/specdev.h> 129 130 #include <dev/dkvar.h> 131 #include <dev/vndvar.h> 132 133 #include "ioconf.h" 134 135 #if defined(VNDDEBUG) && !defined(DEBUG) 136 #define DEBUG 137 #endif 138 139 #ifdef DEBUG 140 int dovndcluster = 1; 141 #define VDB_FOLLOW 0x01 142 #define VDB_INIT 0x02 143 #define VDB_IO 0x04 144 #define VDB_LABEL 0x08 145 int vnddebug = 0; 146 #endif 147 148 #define vndunit(x) DISKUNIT(x) 149 150 struct vndxfer { 151 struct buf vx_buf; 152 struct vnd_softc *vx_vnd; 153 }; 154 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 155 156 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 157 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 158 159 #define VNDLABELDEV(dev) \ 160 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 161 162 #define VND_MAXPENDING(vnd) ((vnd)->sc_maxactive * 4) 163 #define VND_MAXPAGES(vnd) (1024 * 1024 / PAGE_SIZE) 164 165 166 static void vndclear(struct vnd_softc *, int); 167 static int vnddoclear(struct vnd_softc *, int, int, bool); 168 static int vndsetcred(struct vnd_softc *, kauth_cred_t); 169 static void vndthrottle(struct vnd_softc *, struct vnode *); 170 static void vndiodone(struct buf *); 171 #if 0 172 static void vndshutdown(void); 173 #endif 174 175 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 176 static void vndgetdisklabel(dev_t, struct vnd_softc *); 177 178 static int vndlock(struct vnd_softc *); 179 static void vndunlock(struct vnd_softc *); 180 #ifdef VND_COMPRESSION 181 static void compstrategy(struct buf *, off_t); 182 static void *vnd_alloc(void *, u_int, u_int); 183 static void vnd_free(void *, void *); 184 #endif /* VND_COMPRESSION */ 185 186 static void vndthread(void *); 187 static bool vnode_has_op(const struct vnode *, int); 188 static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 189 struct buf *); 190 static void handle_with_strategy(struct vnd_softc *, const struct buf *, 191 struct buf *); 192 static void vnd_set_geometry(struct vnd_softc *); 193 194 static dev_type_open(vndopen); 195 static dev_type_close(vndclose); 196 static dev_type_read(vndread); 197 static dev_type_write(vndwrite); 198 static dev_type_ioctl(vndioctl); 199 static dev_type_strategy(vndstrategy); 200 static dev_type_dump(vnddump); 201 static dev_type_size(vndsize); 202 203 const struct bdevsw vnd_bdevsw = { 204 .d_open = vndopen, 205 .d_close = vndclose, 206 .d_strategy = vndstrategy, 207 .d_ioctl = vndioctl, 208 .d_dump = vnddump, 209 .d_psize = vndsize, 210 .d_discard = nodiscard, 211 .d_flag = D_DISK 212 }; 213 214 const struct cdevsw vnd_cdevsw = { 215 .d_open = vndopen, 216 .d_close = vndclose, 217 .d_read = vndread, 218 .d_write = vndwrite, 219 .d_ioctl = vndioctl, 220 .d_stop = nostop, 221 .d_tty = notty, 222 .d_poll = nopoll, 223 .d_mmap = nommap, 224 .d_kqfilter = nokqfilter, 225 .d_discard = nodiscard, 226 .d_flag = D_DISK 227 }; 228 229 static int vnd_match(device_t, cfdata_t, void *); 230 static void vnd_attach(device_t, device_t, void *); 231 static int vnd_detach(device_t, int); 232 233 CFATTACH_DECL3_NEW(vnd, sizeof(struct vnd_softc), 234 vnd_match, vnd_attach, vnd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN); 235 236 static struct vnd_softc *vnd_spawn(int); 237 int vnd_destroy(device_t); 238 239 static struct dkdriver vnddkdriver = { 240 .d_strategy = vndstrategy, 241 .d_minphys = minphys 242 }; 243 244 void 245 vndattach(int num) 246 { 247 int error; 248 249 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 250 if (error) 251 aprint_error("%s: unable to register cfattach, error = %d\n", 252 vnd_cd.cd_name, error); 253 } 254 255 static int 256 vnd_match(device_t self, cfdata_t cfdata, void *aux) 257 { 258 259 return 1; 260 } 261 262 static void 263 vnd_attach(device_t parent, device_t self, void *aux) 264 { 265 struct vnd_softc *sc = device_private(self); 266 267 sc->sc_dev = self; 268 sc->sc_comp_offsets = NULL; 269 sc->sc_comp_buff = NULL; 270 sc->sc_comp_decombuf = NULL; 271 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 272 disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver); 273 if (!pmf_device_register(self, NULL, NULL)) 274 aprint_error_dev(self, "couldn't establish power handler\n"); 275 } 276 277 static int 278 vnd_detach(device_t self, int flags) 279 { 280 int error; 281 struct vnd_softc *sc = device_private(self); 282 283 if (sc->sc_flags & VNF_INITED) { 284 error = vnddoclear(sc, 0, -1, (flags & DETACH_FORCE) != 0); 285 if (error != 0) 286 return error; 287 } 288 289 pmf_device_deregister(self); 290 bufq_free(sc->sc_tab); 291 disk_destroy(&sc->sc_dkdev); 292 293 return 0; 294 } 295 296 static struct vnd_softc * 297 vnd_spawn(int unit) 298 { 299 cfdata_t cf; 300 301 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 302 cf->cf_name = vnd_cd.cd_name; 303 cf->cf_atname = vnd_cd.cd_name; 304 cf->cf_unit = unit; 305 cf->cf_fstate = FSTATE_STAR; 306 307 return device_private(config_attach_pseudo(cf)); 308 } 309 310 int 311 vnd_destroy(device_t dev) 312 { 313 int error; 314 cfdata_t cf; 315 316 cf = device_cfdata(dev); 317 error = config_detach(dev, DETACH_QUIET); 318 if (error) 319 return error; 320 free(cf, M_DEVBUF); 321 return 0; 322 } 323 324 static int 325 vndopen(dev_t dev, int flags, int mode, struct lwp *l) 326 { 327 int unit = vndunit(dev); 328 struct vnd_softc *sc; 329 int error = 0, part, pmask; 330 struct disklabel *lp; 331 332 #ifdef DEBUG 333 if (vnddebug & VDB_FOLLOW) 334 printf("vndopen(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 335 #endif 336 sc = device_lookup_private(&vnd_cd, unit); 337 if (sc == NULL) { 338 sc = vnd_spawn(unit); 339 if (sc == NULL) 340 return ENOMEM; 341 342 /* compatibility, keep disklabel after close */ 343 sc->sc_flags = VNF_KLABEL; 344 } 345 346 if ((error = vndlock(sc)) != 0) 347 return error; 348 349 mutex_enter(&sc->sc_dkdev.dk_openlock); 350 351 if ((sc->sc_flags & VNF_CLEARING) != 0) { 352 error = ENXIO; 353 goto done; 354 } 355 356 lp = sc->sc_dkdev.dk_label; 357 358 part = DISKPART(dev); 359 pmask = (1 << part); 360 361 if (sc->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 362 error = EBUSY; 363 goto done; 364 } 365 366 if (sc->sc_flags & VNF_INITED) { 367 if ((sc->sc_dkdev.dk_openmask & ~(1<<RAW_PART)) != 0) { 368 /* 369 * If any non-raw partition is open, but the disk 370 * has been invalidated, disallow further opens. 371 */ 372 if ((sc->sc_flags & VNF_VLABEL) == 0) { 373 error = EIO; 374 goto done; 375 } 376 } else { 377 /* 378 * Load the partition info if not already loaded. 379 */ 380 if ((sc->sc_flags & VNF_VLABEL) == 0) { 381 sc->sc_flags |= VNF_VLABEL; 382 vndgetdisklabel(dev, sc); 383 } 384 } 385 } 386 387 /* Check that the partitions exists. */ 388 if (part != RAW_PART) { 389 if (((sc->sc_flags & VNF_INITED) == 0) || 390 ((part >= lp->d_npartitions) || 391 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 392 error = ENXIO; 393 goto done; 394 } 395 } 396 397 /* Prevent our unit from being unconfigured while open. */ 398 switch (mode) { 399 case S_IFCHR: 400 sc->sc_dkdev.dk_copenmask |= pmask; 401 break; 402 403 case S_IFBLK: 404 sc->sc_dkdev.dk_bopenmask |= pmask; 405 break; 406 } 407 sc->sc_dkdev.dk_openmask = 408 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 409 410 done: 411 mutex_exit(&sc->sc_dkdev.dk_openlock); 412 vndunlock(sc); 413 return error; 414 } 415 416 static int 417 vndclose(dev_t dev, int flags, int mode, struct lwp *l) 418 { 419 int unit = vndunit(dev); 420 struct vnd_softc *sc; 421 int error = 0, part; 422 423 #ifdef DEBUG 424 if (vnddebug & VDB_FOLLOW) 425 printf("vndclose(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 426 #endif 427 sc = device_lookup_private(&vnd_cd, unit); 428 if (sc == NULL) 429 return ENXIO; 430 431 if ((error = vndlock(sc)) != 0) 432 return error; 433 434 mutex_enter(&sc->sc_dkdev.dk_openlock); 435 436 part = DISKPART(dev); 437 438 /* ...that much closer to allowing unconfiguration... */ 439 switch (mode) { 440 case S_IFCHR: 441 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 442 break; 443 444 case S_IFBLK: 445 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 446 break; 447 } 448 sc->sc_dkdev.dk_openmask = 449 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 450 451 /* are we last opener ? */ 452 if (sc->sc_dkdev.dk_openmask == 0) { 453 if ((sc->sc_flags & VNF_KLABEL) == 0) 454 sc->sc_flags &= ~VNF_VLABEL; 455 } 456 457 mutex_exit(&sc->sc_dkdev.dk_openlock); 458 459 vndunlock(sc); 460 461 if ((sc->sc_flags & VNF_INITED) == 0) { 462 if ((error = vnd_destroy(sc->sc_dev)) != 0) { 463 aprint_error_dev(sc->sc_dev, 464 "unable to detach instance\n"); 465 return error; 466 } 467 } 468 469 return 0; 470 } 471 472 /* 473 * Queue the request, and wakeup the kernel thread to handle it. 474 */ 475 static void 476 vndstrategy(struct buf *bp) 477 { 478 int unit = vndunit(bp->b_dev); 479 struct vnd_softc *vnd = 480 device_lookup_private(&vnd_cd, unit); 481 struct disklabel *lp; 482 daddr_t blkno; 483 int s = splbio(); 484 485 if (vnd == NULL) { 486 bp->b_error = ENXIO; 487 goto done; 488 } 489 lp = vnd->sc_dkdev.dk_label; 490 491 if ((vnd->sc_flags & VNF_INITED) == 0) { 492 bp->b_error = ENXIO; 493 goto done; 494 } 495 496 /* 497 * The transfer must be a whole number of blocks. 498 */ 499 if ((bp->b_bcount % lp->d_secsize) != 0) { 500 bp->b_error = EINVAL; 501 goto done; 502 } 503 504 /* 505 * check if we're read-only. 506 */ 507 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 508 bp->b_error = EACCES; 509 goto done; 510 } 511 512 /* If it's a nil transfer, wake up the top half now. */ 513 if (bp->b_bcount == 0) { 514 goto done; 515 } 516 517 /* 518 * Do bounds checking and adjust transfer. If there's an error, 519 * the bounds check will flag that for us. 520 */ 521 if (DISKPART(bp->b_dev) == RAW_PART) { 522 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 523 vnd->sc_size) <= 0) 524 goto done; 525 } else { 526 if (bounds_check_with_label(&vnd->sc_dkdev, 527 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 528 goto done; 529 } 530 531 /* 532 * Put the block number in terms of the logical blocksize 533 * of the "device". 534 */ 535 536 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 537 538 /* 539 * Translate the partition-relative block number to an absolute. 540 */ 541 if (DISKPART(bp->b_dev) != RAW_PART) { 542 struct partition *pp; 543 544 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 545 DISKPART(bp->b_dev)]; 546 blkno += pp->p_offset; 547 } 548 bp->b_rawblkno = blkno; 549 550 #ifdef DEBUG 551 if (vnddebug & VDB_FOLLOW) 552 printf("vndstrategy(%p): unit %d\n", bp, unit); 553 #endif 554 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 555 KASSERT(vnd->sc_pending >= 0 && 556 vnd->sc_pending <= VND_MAXPENDING(vnd)); 557 while (vnd->sc_pending == VND_MAXPENDING(vnd)) 558 tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0); 559 vnd->sc_pending++; 560 } 561 bufq_put(vnd->sc_tab, bp); 562 wakeup(&vnd->sc_tab); 563 splx(s); 564 return; 565 566 done: 567 bp->b_resid = bp->b_bcount; 568 biodone(bp); 569 splx(s); 570 } 571 572 static bool 573 vnode_has_strategy(struct vnd_softc *vnd) 574 { 575 return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 576 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 577 } 578 579 /* Verify that I/O requests cannot be smaller than the 580 * smallest I/O size supported by the backend. 581 */ 582 static bool 583 vnode_has_large_blocks(struct vnd_softc *vnd) 584 { 585 u_int32_t vnd_secsize, iosize; 586 587 iosize = vnd->sc_iosize; 588 vnd_secsize = vnd->sc_geom.vng_secsize; 589 590 return vnd_secsize % iosize != 0; 591 } 592 593 /* XXX this function needs a reliable check to detect 594 * sparse files. Otherwise, bmap/strategy may be used 595 * and fail on non-allocated blocks. VOP_READ/VOP_WRITE 596 * works on sparse files. 597 */ 598 #if notyet 599 static bool 600 vnode_strategy_probe(struct vnd_softc *vnd) 601 { 602 int error; 603 daddr_t nbn; 604 605 if (!vnode_has_strategy(vnd)) 606 return false; 607 608 if (vnode_has_large_blocks(vnd)) 609 return false; 610 611 /* Convert the first logical block number to its 612 * physical block number. 613 */ 614 error = 0; 615 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 616 error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL); 617 VOP_UNLOCK(vnd->sc_vp); 618 619 /* Test if that worked. */ 620 if (error == 0 && (long)nbn == -1) 621 return false; 622 623 return true; 624 } 625 #endif 626 627 static void 628 vndthread(void *arg) 629 { 630 struct vnd_softc *vnd = arg; 631 int s; 632 633 /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to 634 * directly access the backing vnode. If we can, use these two 635 * operations to avoid messing with the local buffer cache. 636 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 637 * which are guaranteed to work with any file system. */ 638 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 639 ! vnode_has_strategy(vnd)) 640 vnd->sc_flags |= VNF_USE_VN_RDWR; 641 642 /* VOP_STRATEGY can only be used if the backing vnode allows 643 * to access blocks as small as defined by the vnd geometry. 644 */ 645 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 646 vnode_has_large_blocks(vnd)) 647 vnd->sc_flags |= VNF_USE_VN_RDWR; 648 649 #ifdef DEBUG 650 if (vnddebug & VDB_INIT) 651 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 652 (vnd->sc_flags & VNF_USE_VN_RDWR) == 0 ? 653 "using bmap/strategy operations" : 654 "using read/write operations"); 655 #endif 656 657 s = splbio(); 658 vnd->sc_flags |= VNF_KTHREAD; 659 wakeup(&vnd->sc_kthread); 660 661 /* 662 * Dequeue requests and serve them depending on the available 663 * vnode operations. 664 */ 665 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 666 struct vndxfer *vnx; 667 struct buf *obp; 668 struct buf *bp; 669 670 obp = bufq_get(vnd->sc_tab); 671 if (obp == NULL) { 672 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 673 continue; 674 }; 675 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 676 KASSERT(vnd->sc_pending > 0 && 677 vnd->sc_pending <= VND_MAXPENDING(vnd)); 678 if (vnd->sc_pending-- == VND_MAXPENDING(vnd)) 679 wakeup(&vnd->sc_pending); 680 } 681 splx(s); 682 #ifdef DEBUG 683 if (vnddebug & VDB_FOLLOW) 684 printf("vndthread(%p)\n", obp); 685 #endif 686 687 if (vnd->sc_vp->v_mount == NULL) { 688 obp->b_error = ENXIO; 689 goto done; 690 } 691 #ifdef VND_COMPRESSION 692 /* handle a compressed read */ 693 if ((obp->b_flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 694 off_t bn; 695 696 /* Convert to a byte offset within the file. */ 697 bn = obp->b_rawblkno * 698 vnd->sc_dkdev.dk_label->d_secsize; 699 700 compstrategy(obp, bn); 701 goto done; 702 } 703 #endif /* VND_COMPRESSION */ 704 705 /* 706 * Allocate a header for this transfer and link it to the 707 * buffer 708 */ 709 s = splbio(); 710 vnx = VND_GETXFER(vnd); 711 splx(s); 712 vnx->vx_vnd = vnd; 713 714 s = splbio(); 715 while (vnd->sc_active >= vnd->sc_maxactive) { 716 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 717 } 718 vnd->sc_active++; 719 splx(s); 720 721 /* Instrumentation. */ 722 disk_busy(&vnd->sc_dkdev); 723 724 bp = &vnx->vx_buf; 725 buf_init(bp); 726 bp->b_flags = (obp->b_flags & B_READ); 727 bp->b_oflags = obp->b_oflags; 728 bp->b_cflags = obp->b_cflags; 729 bp->b_iodone = vndiodone; 730 bp->b_private = obp; 731 bp->b_vp = vnd->sc_vp; 732 bp->b_objlock = bp->b_vp->v_interlock; 733 bp->b_data = obp->b_data; 734 bp->b_bcount = obp->b_bcount; 735 BIO_COPYPRIO(bp, obp); 736 737 /* Make sure the request succeeds while suspending this fs. */ 738 fstrans_start_lazy(vnd->sc_vp->v_mount); 739 740 /* Handle the request using the appropriate operations. */ 741 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0) 742 handle_with_strategy(vnd, obp, bp); 743 else 744 handle_with_rdwr(vnd, obp, bp); 745 746 fstrans_done(vnd->sc_vp->v_mount); 747 748 s = splbio(); 749 continue; 750 751 done: 752 biodone(obp); 753 s = splbio(); 754 } 755 756 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 757 wakeup(&vnd->sc_kthread); 758 splx(s); 759 kthread_exit(0); 760 } 761 762 /* 763 * Checks if the given vnode supports the requested operation. 764 * The operation is specified the offset returned by VOFFSET. 765 * 766 * XXX The test below used to determine this is quite fragile 767 * because it relies on the file system to use genfs to specify 768 * unimplemented operations. There might be another way to do 769 * it more cleanly. 770 */ 771 static bool 772 vnode_has_op(const struct vnode *vp, int opoffset) 773 { 774 int (*defaultp)(void *); 775 int (*opp)(void *); 776 777 defaultp = vp->v_op[VOFFSET(vop_default)]; 778 opp = vp->v_op[opoffset]; 779 780 return opp != defaultp && opp != genfs_eopnotsupp && 781 opp != genfs_badop && opp != genfs_nullop; 782 } 783 784 /* 785 * Handles the read/write request given in 'bp' using the vnode's VOP_READ 786 * and VOP_WRITE operations. 787 * 788 * 'obp' is a pointer to the original request fed to the vnd device. 789 */ 790 static void 791 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 792 { 793 bool doread; 794 off_t offset; 795 size_t len, resid; 796 struct vnode *vp; 797 798 doread = bp->b_flags & B_READ; 799 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 800 len = bp->b_bcount; 801 vp = vnd->sc_vp; 802 803 #if defined(DEBUG) 804 if (vnddebug & VDB_IO) 805 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 806 ", secsize %d, offset %" PRIu64 807 ", bcount %d\n", 808 vp, doread ? "read" : "write", obp->b_rawblkno, 809 vnd->sc_dkdev.dk_label->d_secsize, offset, 810 bp->b_bcount); 811 #endif 812 813 /* Issue the read or write operation. */ 814 bp->b_error = 815 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 816 vp, bp->b_data, len, offset, UIO_SYSSPACE, 817 IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_DIRECT, 818 vnd->sc_cred, &resid, NULL); 819 bp->b_resid = resid; 820 821 /* 822 * Avoid caching too many pages, the vnd user 823 * is usually a filesystem and caches itself. 824 * We need some amount of caching to not hinder 825 * read-ahead and write-behind operations. 826 */ 827 mutex_enter(vp->v_interlock); 828 if (vp->v_uobj.uo_npages > VND_MAXPAGES(vnd)) 829 (void) VOP_PUTPAGES(vp, 0, 0, 830 PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE); 831 else 832 mutex_exit(vp->v_interlock); 833 834 /* We need to increase the number of outputs on the vnode if 835 * there was any write to it. */ 836 if (!doread) { 837 mutex_enter(vp->v_interlock); 838 vp->v_numoutput++; 839 mutex_exit(vp->v_interlock); 840 } 841 842 biodone(bp); 843 } 844 845 /* 846 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 847 * and VOP_STRATEGY operations. 848 * 849 * 'obp' is a pointer to the original request fed to the vnd device. 850 */ 851 static void 852 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 853 struct buf *bp) 854 { 855 int bsize, error, flags, skipped; 856 size_t resid, sz; 857 off_t bn, offset; 858 struct vnode *vp; 859 struct buf *nbp = NULL; 860 861 flags = obp->b_flags; 862 863 864 /* convert to a byte offset within the file. */ 865 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 866 867 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 868 skipped = 0; 869 870 /* 871 * Break the request into bsize pieces and feed them 872 * sequentially using VOP_BMAP/VOP_STRATEGY. 873 * We do it this way to keep from flooding NFS servers if we 874 * are connected to an NFS file. This places the burden on 875 * the client rather than the server. 876 */ 877 error = 0; 878 bp->b_resid = bp->b_bcount; 879 for (offset = 0, resid = bp->b_resid; /* true */; 880 resid -= sz, offset += sz) { 881 daddr_t nbn; 882 int off, nra; 883 884 nra = 0; 885 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 886 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 887 VOP_UNLOCK(vnd->sc_vp); 888 889 if (error == 0 && (long)nbn == -1) 890 error = EIO; 891 892 /* 893 * If there was an error or a hole in the file...punt. 894 * Note that we may have to wait for any operations 895 * that we have already fired off before releasing 896 * the buffer. 897 * 898 * XXX we could deal with holes here but it would be 899 * a hassle (in the write case). 900 */ 901 if (error) { 902 skipped += resid; 903 break; 904 } 905 906 #ifdef DEBUG 907 if (!dovndcluster) 908 nra = 0; 909 #endif 910 911 off = bn % bsize; 912 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 913 #ifdef DEBUG 914 if (vnddebug & VDB_IO) 915 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 916 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, 917 nbn, sz); 918 #endif 919 920 nbp = getiobuf(vp, true); 921 nestiobuf_setup(bp, nbp, offset, sz); 922 nbp->b_blkno = nbn + btodb(off); 923 924 #if 0 /* XXX #ifdef DEBUG */ 925 if (vnddebug & VDB_IO) 926 printf("vndstart(%ld): bp %p vp %p blkno " 927 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 928 (long) (vnd-vnd_softc), &nbp->vb_buf, 929 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 930 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 931 nbp->vb_buf.b_bcount); 932 #endif 933 if (resid == sz) { 934 break; 935 } 936 VOP_STRATEGY(vp, nbp); 937 bn += sz; 938 } 939 if (!(flags & B_READ)) { 940 struct vnode *w_vp; 941 /* 942 * this is the last nested buf, account for 943 * the parent buf write too. 944 * This has to be done last, so that 945 * fsync won't wait for this write which 946 * has no chance to complete before all nested bufs 947 * have been queued. But it has to be done 948 * before the last VOP_STRATEGY() 949 * or the call to nestiobuf_done(). 950 */ 951 w_vp = bp->b_vp; 952 mutex_enter(w_vp->v_interlock); 953 w_vp->v_numoutput++; 954 mutex_exit(w_vp->v_interlock); 955 } 956 KASSERT(skipped != 0 || nbp != NULL); 957 if (skipped) 958 nestiobuf_done(bp, skipped, error); 959 else 960 VOP_STRATEGY(vp, nbp); 961 } 962 963 static void 964 vndiodone(struct buf *bp) 965 { 966 struct vndxfer *vnx = VND_BUFTOXFER(bp); 967 struct vnd_softc *vnd = vnx->vx_vnd; 968 struct buf *obp = bp->b_private; 969 int s = splbio(); 970 971 KASSERT(&vnx->vx_buf == bp); 972 KASSERT(vnd->sc_active > 0); 973 #ifdef DEBUG 974 if (vnddebug & VDB_IO) { 975 printf("vndiodone1: bp %p iodone: error %d\n", 976 bp, bp->b_error); 977 } 978 #endif 979 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 980 (bp->b_flags & B_READ)); 981 vnd->sc_active--; 982 if (vnd->sc_active == 0) { 983 wakeup(&vnd->sc_tab); 984 } 985 splx(s); 986 obp->b_error = bp->b_error; 987 obp->b_resid = bp->b_resid; 988 buf_destroy(bp); 989 VND_PUTXFER(vnd, vnx); 990 biodone(obp); 991 } 992 993 /* ARGSUSED */ 994 static int 995 vndread(dev_t dev, struct uio *uio, int flags) 996 { 997 int unit = vndunit(dev); 998 struct vnd_softc *sc; 999 1000 #ifdef DEBUG 1001 if (vnddebug & VDB_FOLLOW) 1002 printf("vndread(0x%"PRIx64", %p)\n", dev, uio); 1003 #endif 1004 1005 sc = device_lookup_private(&vnd_cd, unit); 1006 if (sc == NULL) 1007 return ENXIO; 1008 1009 if ((sc->sc_flags & VNF_INITED) == 0) 1010 return ENXIO; 1011 1012 return physio(vndstrategy, NULL, dev, B_READ, minphys, uio); 1013 } 1014 1015 /* ARGSUSED */ 1016 static int 1017 vndwrite(dev_t dev, struct uio *uio, int flags) 1018 { 1019 int unit = vndunit(dev); 1020 struct vnd_softc *sc; 1021 1022 #ifdef DEBUG 1023 if (vnddebug & VDB_FOLLOW) 1024 printf("vndwrite(0x%"PRIx64", %p)\n", dev, uio); 1025 #endif 1026 1027 sc = device_lookup_private(&vnd_cd, unit); 1028 if (sc == NULL) 1029 return ENXIO; 1030 1031 if ((sc->sc_flags & VNF_INITED) == 0) 1032 return ENXIO; 1033 1034 return physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio); 1035 } 1036 1037 static int 1038 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 1039 { 1040 int error; 1041 struct vnd_softc *vnd; 1042 1043 if (*un == -1) 1044 *un = unit; 1045 if (*un < 0) 1046 return EINVAL; 1047 1048 vnd = device_lookup_private(&vnd_cd, *un); 1049 if (vnd == NULL) 1050 return -1; 1051 1052 if ((vnd->sc_flags & VNF_INITED) == 0) 1053 return -1; 1054 1055 vn_lock(vnd->sc_vp, LK_SHARED | LK_RETRY); 1056 error = VOP_GETATTR(vnd->sc_vp, va, l->l_cred); 1057 VOP_UNLOCK(vnd->sc_vp); 1058 return error; 1059 } 1060 1061 static int 1062 vnddoclear(struct vnd_softc *vnd, int pmask, int minor, bool force) 1063 { 1064 int error; 1065 1066 if ((error = vndlock(vnd)) != 0) 1067 return error; 1068 1069 /* 1070 * Don't unconfigure if any other partitions are open 1071 * or if both the character and block flavors of this 1072 * partition are open. 1073 */ 1074 if (DK_BUSY(vnd, pmask) && !force) { 1075 vndunlock(vnd); 1076 return EBUSY; 1077 } 1078 1079 /* Delete all of our wedges */ 1080 dkwedge_delall(&vnd->sc_dkdev); 1081 1082 /* 1083 * XXX vndclear() might call vndclose() implicitly; 1084 * release lock to avoid recursion 1085 * 1086 * Set VNF_CLEARING to prevent vndopen() from 1087 * sneaking in after we vndunlock(). 1088 */ 1089 vnd->sc_flags |= VNF_CLEARING; 1090 vndunlock(vnd); 1091 vndclear(vnd, minor); 1092 #ifdef DEBUG 1093 if (vnddebug & VDB_INIT) 1094 printf("%s: CLRed\n", __func__); 1095 #endif 1096 1097 /* Destroy the xfer and buffer pools. */ 1098 pool_destroy(&vnd->sc_vxpool); 1099 1100 /* Detach the disk. */ 1101 disk_detach(&vnd->sc_dkdev); 1102 1103 return 0; 1104 } 1105 1106 static int 1107 vndioctl_get(struct lwp *l, void *data, int unit, struct vattr *va) 1108 { 1109 int error; 1110 1111 KASSERT(l); 1112 1113 /* the first member is always int vnd_unit in all the versions */ 1114 if (*(int *)data >= vnd_cd.cd_ndevs) 1115 return ENXIO; 1116 1117 switch (error = vnd_cget(l, unit, (int *)data, va)) { 1118 case -1: 1119 /* unused is not an error */ 1120 memset(va, 0, sizeof(*va)); 1121 /*FALLTHROUGH*/ 1122 case 0: 1123 return 0; 1124 default: 1125 return error; 1126 } 1127 } 1128 1129 /* ARGSUSED */ 1130 static int 1131 vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1132 { 1133 bool force; 1134 int unit = vndunit(dev); 1135 struct vnd_softc *vnd; 1136 struct vnd_ioctl *vio; 1137 struct vattr vattr; 1138 struct pathbuf *pb; 1139 struct nameidata nd; 1140 int error, part, pmask; 1141 uint64_t geomsize; 1142 int fflags; 1143 #ifdef __HAVE_OLD_DISKLABEL 1144 struct disklabel newlabel; 1145 #endif 1146 1147 #ifdef DEBUG 1148 if (vnddebug & VDB_FOLLOW) 1149 printf("vndioctl(0x%"PRIx64", 0x%lx, %p, 0x%x, %p): unit %d\n", 1150 dev, cmd, data, flag, l->l_proc, unit); 1151 #endif 1152 /* Do the get's first; they don't need initialization or verification */ 1153 switch (cmd) { 1154 case VNDIOCGET: 1155 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) 1156 return error; 1157 1158 struct vnd_user *vnu = data; 1159 vnu->vnu_dev = vattr.va_fsid; 1160 vnu->vnu_ino = vattr.va_fileid; 1161 return 0; 1162 1163 default: 1164 /* First check for COMPAT_50 hook */ 1165 MODULE_HOOK_CALL(compat_vndioctl_50_hook, 1166 (cmd, l, data, unit, &vattr, vndioctl_get), 1167 enosys(), error); 1168 1169 /* 1170 * If not present, then COMPAT_30 hook also not 1171 * present, so just continue with checks for the 1172 * "write" commands 1173 */ 1174 if (error == ENOSYS) { 1175 error = 0; 1176 break; 1177 } 1178 1179 /* If not already handled, try the COMPAT_30 hook */ 1180 if (error == EPASSTHROUGH) 1181 MODULE_HOOK_CALL(compat_vndioctl_30_hook, 1182 (cmd, l, data, unit, &vattr, vndioctl_get), 1183 enosys(), error); 1184 1185 /* If no COMPAT_30 module, or not handled, check writes */ 1186 if (error == ENOSYS || error == EPASSTHROUGH) { 1187 error = 0; 1188 break; 1189 } 1190 return error; 1191 } 1192 1193 vnd = device_lookup_private(&vnd_cd, unit); 1194 if (vnd == NULL) 1195 return ENXIO; 1196 vio = (struct vnd_ioctl *)data; 1197 1198 /* Must be open for writes for these commands... */ 1199 switch (cmd) { 1200 case VNDIOCSET50: 1201 case VNDIOCCLR50: 1202 if (!compat_vndioctl_50_hook.hooked) 1203 return EINVAL; 1204 /* FALLTHROUGH */ 1205 case VNDIOCSET: 1206 case VNDIOCCLR: 1207 case DIOCSDINFO: 1208 case DIOCWDINFO: 1209 #ifdef __HAVE_OLD_DISKLABEL 1210 case ODIOCSDINFO: 1211 case ODIOCWDINFO: 1212 #endif 1213 case DIOCKLABEL: 1214 case DIOCWLABEL: 1215 if ((flag & FWRITE) == 0) 1216 return EBADF; 1217 } 1218 1219 /* Must be initialized for these... */ 1220 switch (cmd) { 1221 case VNDIOCCLR: 1222 case VNDIOCCLR50: 1223 case DIOCGDINFO: 1224 case DIOCSDINFO: 1225 case DIOCWDINFO: 1226 case DIOCGPARTINFO: 1227 case DIOCKLABEL: 1228 case DIOCWLABEL: 1229 case DIOCGDEFLABEL: 1230 case DIOCCACHESYNC: 1231 #ifdef __HAVE_OLD_DISKLABEL 1232 case ODIOCGDINFO: 1233 case ODIOCSDINFO: 1234 case ODIOCWDINFO: 1235 case ODIOCGDEFLABEL: 1236 #endif 1237 if ((vnd->sc_flags & VNF_INITED) == 0) 1238 return ENXIO; 1239 } 1240 1241 error = disk_ioctl(&vnd->sc_dkdev, dev, cmd, data, flag, l); 1242 if (error != EPASSTHROUGH) 1243 return error; 1244 1245 1246 switch (cmd) { 1247 case VNDIOCSET50: 1248 case VNDIOCSET: 1249 if (vnd->sc_flags & VNF_INITED) 1250 return EBUSY; 1251 1252 if ((error = vndlock(vnd)) != 0) 1253 return error; 1254 1255 fflags = FREAD; 1256 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 1257 fflags |= FWRITE; 1258 if ((vio->vnd_flags & VNDIOF_FILEIO) != 0) 1259 vnd->sc_flags |= VNF_USE_VN_RDWR; 1260 error = pathbuf_copyin(vio->vnd_file, &pb); 1261 if (error) { 1262 goto unlock_and_exit; 1263 } 1264 NDINIT(&nd, LOOKUP, FOLLOW, pb); 1265 if ((error = vn_open(&nd, fflags, 0)) != 0) { 1266 pathbuf_destroy(pb); 1267 goto unlock_and_exit; 1268 } 1269 KASSERT(l); 1270 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred); 1271 if (!error && nd.ni_vp->v_type != VREG) 1272 error = EOPNOTSUPP; 1273 if (!error && vattr.va_bytes < vattr.va_size) 1274 /* File is definitely sparse, use vn_rdwr() */ 1275 vnd->sc_flags |= VNF_USE_VN_RDWR; 1276 if (error) { 1277 VOP_UNLOCK(nd.ni_vp); 1278 goto close_and_exit; 1279 } 1280 1281 /* If using a compressed file, initialize its info */ 1282 /* (or abort with an error if kernel has no compression) */ 1283 if (vio->vnd_flags & VNDIOF_COMP) { 1284 #ifdef VND_COMPRESSION 1285 struct vnd_comp_header *ch; 1286 int i; 1287 uint32_t comp_size; 1288 uint32_t comp_maxsize; 1289 1290 /* allocate space for compresed file header */ 1291 ch = malloc(sizeof(struct vnd_comp_header), 1292 M_TEMP, M_WAITOK); 1293 1294 /* read compressed file header */ 1295 error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch, 1296 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1297 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1298 if (error) { 1299 free(ch, M_TEMP); 1300 VOP_UNLOCK(nd.ni_vp); 1301 goto close_and_exit; 1302 } 1303 1304 if (be32toh(ch->block_size) == 0 || 1305 be32toh(ch->num_blocks) > UINT32_MAX - 1) { 1306 free(ch, M_TEMP); 1307 VOP_UNLOCK(nd.ni_vp); 1308 goto close_and_exit; 1309 } 1310 1311 /* save some header info */ 1312 vnd->sc_comp_blksz = be32toh(ch->block_size); 1313 /* note last offset is the file byte size */ 1314 vnd->sc_comp_numoffs = be32toh(ch->num_blocks) + 1; 1315 free(ch, M_TEMP); 1316 if (!DK_DEV_BSIZE_OK(vnd->sc_comp_blksz)) { 1317 VOP_UNLOCK(nd.ni_vp); 1318 error = EINVAL; 1319 goto close_and_exit; 1320 } 1321 KASSERT(0 < vnd->sc_comp_blksz); 1322 KASSERT(0 < vnd->sc_comp_numoffs); 1323 /* 1324 * @#^@!$& gcc -Wtype-limits refuses to let me 1325 * write SIZE_MAX/sizeof(uint64_t) < numoffs, 1326 * because the range of the type on amd64 makes 1327 * the comparisons always false. 1328 */ 1329 #if SIZE_MAX <= UINT32_MAX*(64/CHAR_BIT) 1330 if (SIZE_MAX/sizeof(uint64_t) < vnd->sc_comp_numoffs) { 1331 VOP_UNLOCK(nd.ni_vp); 1332 error = EINVAL; 1333 goto close_and_exit; 1334 } 1335 #endif 1336 if ((vattr.va_size < sizeof(struct vnd_comp_header)) || 1337 (vattr.va_size - sizeof(struct vnd_comp_header) < 1338 sizeof(uint64_t)*vnd->sc_comp_numoffs) || 1339 (UQUAD_MAX/vnd->sc_comp_blksz < 1340 vnd->sc_comp_numoffs - 1)) { 1341 VOP_UNLOCK(nd.ni_vp); 1342 error = EINVAL; 1343 goto close_and_exit; 1344 } 1345 1346 /* set decompressed file size */ 1347 KASSERT(vnd->sc_comp_numoffs - 1 <= 1348 UQUAD_MAX/vnd->sc_comp_blksz); 1349 vattr.va_size = 1350 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1351 (u_quad_t)vnd->sc_comp_blksz; 1352 1353 /* allocate space for all the compressed offsets */ 1354 __CTASSERT(UINT32_MAX <= UQUAD_MAX/sizeof(uint64_t)); 1355 vnd->sc_comp_offsets = 1356 malloc(sizeof(uint64_t) * vnd->sc_comp_numoffs, 1357 M_DEVBUF, M_WAITOK); 1358 1359 /* read in the offsets */ 1360 error = vn_rdwr(UIO_READ, nd.ni_vp, 1361 (void *)vnd->sc_comp_offsets, 1362 sizeof(uint64_t) * vnd->sc_comp_numoffs, 1363 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1364 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1365 if (error) { 1366 VOP_UNLOCK(nd.ni_vp); 1367 goto close_and_exit; 1368 } 1369 /* 1370 * find largest block size (used for allocation limit). 1371 * Also convert offset to native byte order. 1372 */ 1373 comp_maxsize = 0; 1374 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1375 vnd->sc_comp_offsets[i] = 1376 be64toh(vnd->sc_comp_offsets[i]); 1377 comp_size = 1378 be64toh(vnd->sc_comp_offsets[i + 1]) 1379 - vnd->sc_comp_offsets[i]; 1380 if (comp_size > comp_maxsize) 1381 comp_maxsize = comp_size; 1382 } 1383 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1384 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs 1385 - 1]); 1386 1387 /* create compressed data buffer */ 1388 vnd->sc_comp_buff = malloc(comp_maxsize, 1389 M_DEVBUF, M_WAITOK); 1390 1391 /* create decompressed buffer */ 1392 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1393 M_DEVBUF, M_WAITOK); 1394 vnd->sc_comp_buffblk = -1; 1395 1396 /* Initialize decompress stream */ 1397 memset(&vnd->sc_comp_stream, 0, sizeof(z_stream)); 1398 vnd->sc_comp_stream.zalloc = vnd_alloc; 1399 vnd->sc_comp_stream.zfree = vnd_free; 1400 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1401 if (error) { 1402 if (vnd->sc_comp_stream.msg) 1403 printf("vnd%d: compressed file, %s\n", 1404 unit, vnd->sc_comp_stream.msg); 1405 VOP_UNLOCK(nd.ni_vp); 1406 error = EINVAL; 1407 goto close_and_exit; 1408 } 1409 1410 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1411 #else /* !VND_COMPRESSION */ 1412 VOP_UNLOCK(nd.ni_vp); 1413 error = EOPNOTSUPP; 1414 goto close_and_exit; 1415 #endif /* VND_COMPRESSION */ 1416 } 1417 1418 VOP_UNLOCK(nd.ni_vp); 1419 vnd->sc_vp = nd.ni_vp; 1420 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1421 1422 /* get smallest I/O size for underlying device, fall back to 1423 * fundamental I/O size of underlying filesystem 1424 */ 1425 error = bdev_ioctl(vattr.va_fsid, DIOCGSECTORSIZE, &vnd->sc_iosize, FKIOCTL, l); 1426 if (error) 1427 vnd->sc_iosize = vnd->sc_vp->v_mount->mnt_stat.f_frsize; 1428 1429 /* 1430 * Use pseudo-geometry specified. If none was provided, 1431 * use "standard" Adaptec fictitious geometry. 1432 */ 1433 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1434 1435 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1436 sizeof(vio->vnd_geom)); 1437 1438 /* 1439 * Sanity-check the sector size. 1440 */ 1441 if (!DK_DEV_BSIZE_OK(vnd->sc_geom.vng_secsize) || 1442 vnd->sc_geom.vng_ntracks == 0 || 1443 vnd->sc_geom.vng_nsectors == 0) { 1444 error = EINVAL; 1445 goto close_and_exit; 1446 } 1447 1448 /* 1449 * Compute missing cylinder count from size 1450 */ 1451 if (vnd->sc_geom.vng_ncylinders == 0) 1452 vnd->sc_geom.vng_ncylinders = vnd->sc_size / 1453 (vnd->sc_geom.vng_ntracks * 1454 vnd->sc_geom.vng_nsectors); 1455 1456 /* 1457 * Compute the size (in DEV_BSIZE blocks) specified 1458 * by the geometry. 1459 */ 1460 geomsize = (int64_t)vnd->sc_geom.vng_nsectors * 1461 vnd->sc_geom.vng_ntracks * 1462 vnd->sc_geom.vng_ncylinders * 1463 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1464 1465 /* 1466 * Sanity-check the size against the specified 1467 * geometry. 1468 */ 1469 if (vnd->sc_size < geomsize) { 1470 error = EINVAL; 1471 goto close_and_exit; 1472 } 1473 } else if (vnd->sc_size >= (32 * 64)) { 1474 /* 1475 * Size must be at least 2048 DEV_BSIZE blocks 1476 * (1M) in order to use this geometry. 1477 */ 1478 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1479 vnd->sc_geom.vng_nsectors = 32; 1480 vnd->sc_geom.vng_ntracks = 64; 1481 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1482 } else { 1483 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1484 vnd->sc_geom.vng_nsectors = 1; 1485 vnd->sc_geom.vng_ntracks = 1; 1486 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1487 } 1488 1489 vnd_set_geometry(vnd); 1490 1491 if (vio->vnd_flags & VNDIOF_READONLY) { 1492 vnd->sc_flags |= VNF_READONLY; 1493 } 1494 1495 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1496 goto close_and_exit; 1497 1498 vndthrottle(vnd, vnd->sc_vp); 1499 vio->vnd_osize = dbtob(vnd->sc_size); 1500 if (cmd != VNDIOCSET50) 1501 vio->vnd_size = dbtob(vnd->sc_size); 1502 vnd->sc_flags |= VNF_INITED; 1503 1504 /* create the kernel thread, wait for it to be up */ 1505 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, 1506 &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev)); 1507 if (error) 1508 goto close_and_exit; 1509 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1510 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1511 } 1512 #ifdef DEBUG 1513 if (vnddebug & VDB_INIT) 1514 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1515 vnd->sc_vp, (unsigned long) vnd->sc_size, 1516 vnd->sc_geom.vng_secsize, 1517 vnd->sc_geom.vng_nsectors, 1518 vnd->sc_geom.vng_ntracks, 1519 vnd->sc_geom.vng_ncylinders); 1520 #endif 1521 1522 /* Attach the disk. */ 1523 disk_attach(&vnd->sc_dkdev); 1524 1525 /* Initialize the xfer and buffer pools. */ 1526 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1527 0, 0, "vndxpl", NULL, IPL_BIO); 1528 1529 vndunlock(vnd); 1530 1531 pathbuf_destroy(pb); 1532 1533 /* Discover wedges on this disk */ 1534 dkwedge_discover(&vnd->sc_dkdev); 1535 1536 break; 1537 1538 close_and_exit: 1539 (void) vn_close(nd.ni_vp, fflags, l->l_cred); 1540 pathbuf_destroy(pb); 1541 unlock_and_exit: 1542 #ifdef VND_COMPRESSION 1543 /* free any allocated memory (for compressed file) */ 1544 if (vnd->sc_comp_offsets) { 1545 free(vnd->sc_comp_offsets, M_DEVBUF); 1546 vnd->sc_comp_offsets = NULL; 1547 } 1548 if (vnd->sc_comp_buff) { 1549 free(vnd->sc_comp_buff, M_DEVBUF); 1550 vnd->sc_comp_buff = NULL; 1551 } 1552 if (vnd->sc_comp_decombuf) { 1553 free(vnd->sc_comp_decombuf, M_DEVBUF); 1554 vnd->sc_comp_decombuf = NULL; 1555 } 1556 #endif /* VND_COMPRESSION */ 1557 vndunlock(vnd); 1558 return error; 1559 1560 case VNDIOCCLR50: 1561 case VNDIOCCLR: 1562 part = DISKPART(dev); 1563 pmask = (1 << part); 1564 force = (vio->vnd_flags & VNDIOF_FORCE) != 0; 1565 1566 if ((error = vnddoclear(vnd, pmask, minor(dev), force)) != 0) 1567 return error; 1568 1569 break; 1570 1571 1572 case DIOCWDINFO: 1573 case DIOCSDINFO: 1574 #ifdef __HAVE_OLD_DISKLABEL 1575 case ODIOCWDINFO: 1576 case ODIOCSDINFO: 1577 #endif 1578 { 1579 struct disklabel *lp; 1580 1581 if ((error = vndlock(vnd)) != 0) 1582 return error; 1583 1584 vnd->sc_flags |= VNF_LABELLING; 1585 1586 #ifdef __HAVE_OLD_DISKLABEL 1587 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1588 memset(&newlabel, 0, sizeof newlabel); 1589 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1590 lp = &newlabel; 1591 } else 1592 #endif 1593 lp = (struct disklabel *)data; 1594 1595 error = setdisklabel(vnd->sc_dkdev.dk_label, 1596 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1597 if (error == 0) { 1598 if (cmd == DIOCWDINFO 1599 #ifdef __HAVE_OLD_DISKLABEL 1600 || cmd == ODIOCWDINFO 1601 #endif 1602 ) 1603 error = writedisklabel(VNDLABELDEV(dev), 1604 vndstrategy, vnd->sc_dkdev.dk_label, 1605 vnd->sc_dkdev.dk_cpulabel); 1606 } 1607 1608 vnd->sc_flags &= ~VNF_LABELLING; 1609 1610 vndunlock(vnd); 1611 1612 if (error) 1613 return error; 1614 break; 1615 } 1616 1617 case DIOCKLABEL: 1618 if (*(int *)data != 0) 1619 vnd->sc_flags |= VNF_KLABEL; 1620 else 1621 vnd->sc_flags &= ~VNF_KLABEL; 1622 break; 1623 1624 case DIOCWLABEL: 1625 if (*(int *)data != 0) 1626 vnd->sc_flags |= VNF_WLABEL; 1627 else 1628 vnd->sc_flags &= ~VNF_WLABEL; 1629 break; 1630 1631 case DIOCGDEFLABEL: 1632 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1633 break; 1634 1635 #ifdef __HAVE_OLD_DISKLABEL 1636 case ODIOCGDEFLABEL: 1637 vndgetdefaultlabel(vnd, &newlabel); 1638 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1639 return ENOTTY; 1640 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1641 break; 1642 #endif 1643 1644 case DIOCCACHESYNC: 1645 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1646 error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred, 1647 FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0); 1648 VOP_UNLOCK(vnd->sc_vp); 1649 return error; 1650 1651 default: 1652 return ENOTTY; 1653 } 1654 1655 return 0; 1656 } 1657 1658 /* 1659 * Duplicate the current processes' credentials. Since we are called only 1660 * as the result of a SET ioctl and only root can do that, any future access 1661 * to this "disk" is essentially as root. Note that credentials may change 1662 * if some other uid can write directly to the mapped file (NFS). 1663 */ 1664 static int 1665 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1666 { 1667 struct uio auio; 1668 struct iovec aiov; 1669 char *tmpbuf; 1670 int error; 1671 1672 vnd->sc_cred = kauth_cred_dup(cred); 1673 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1674 1675 /* XXX: Horrible kludge to establish credentials for NFS */ 1676 aiov.iov_base = tmpbuf; 1677 aiov.iov_len = uimin(DEV_BSIZE, dbtob(vnd->sc_size)); 1678 auio.uio_iov = &aiov; 1679 auio.uio_iovcnt = 1; 1680 auio.uio_offset = 0; 1681 auio.uio_rw = UIO_READ; 1682 auio.uio_resid = aiov.iov_len; 1683 UIO_SETUP_SYSSPACE(&auio); 1684 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1685 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1686 if (error == 0) { 1687 /* 1688 * Because vnd does all IO directly through the vnode 1689 * we need to flush (at least) the buffer from the above 1690 * VOP_READ from the buffer cache to prevent cache 1691 * incoherencies. Also, be careful to write dirty 1692 * buffers back to stable storage. 1693 */ 1694 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1695 curlwp, 0, 0); 1696 } 1697 VOP_UNLOCK(vnd->sc_vp); 1698 1699 free(tmpbuf, M_TEMP); 1700 return error; 1701 } 1702 1703 /* 1704 * Set maxactive based on FS type 1705 */ 1706 static void 1707 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1708 { 1709 1710 if (vp->v_tag == VT_NFS) 1711 vnd->sc_maxactive = 2; 1712 else 1713 vnd->sc_maxactive = 8; 1714 1715 if (vnd->sc_maxactive < 1) 1716 vnd->sc_maxactive = 1; 1717 } 1718 1719 #if 0 1720 static void 1721 vndshutdown(void) 1722 { 1723 struct vnd_softc *vnd; 1724 1725 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1726 if (vnd->sc_flags & VNF_INITED) 1727 vndclear(vnd); 1728 } 1729 #endif 1730 1731 static void 1732 vndclear(struct vnd_softc *vnd, int myminor) 1733 { 1734 struct vnode *vp = vnd->sc_vp; 1735 int fflags = FREAD; 1736 int bmaj, cmaj, i, mn; 1737 int s; 1738 1739 #ifdef DEBUG 1740 if (vnddebug & VDB_FOLLOW) 1741 printf("vndclear(%p): vp %p\n", vnd, vp); 1742 #endif 1743 /* locate the major number */ 1744 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1745 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1746 1747 /* Nuke the vnodes for any open instances */ 1748 for (i = 0; i < MAXPARTITIONS; i++) { 1749 mn = DISKMINOR(device_unit(vnd->sc_dev), i); 1750 vdevgone(bmaj, mn, mn, VBLK); 1751 if (mn != myminor) /* XXX avoid to kill own vnode */ 1752 vdevgone(cmaj, mn, mn, VCHR); 1753 } 1754 1755 if ((vnd->sc_flags & VNF_READONLY) == 0) 1756 fflags |= FWRITE; 1757 1758 s = splbio(); 1759 bufq_drain(vnd->sc_tab); 1760 splx(s); 1761 1762 vnd->sc_flags |= VNF_VUNCONF; 1763 wakeup(&vnd->sc_tab); 1764 while (vnd->sc_flags & VNF_KTHREAD) 1765 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1766 1767 #ifdef VND_COMPRESSION 1768 /* free the compressed file buffers */ 1769 if (vnd->sc_flags & VNF_COMP) { 1770 if (vnd->sc_comp_offsets) { 1771 free(vnd->sc_comp_offsets, M_DEVBUF); 1772 vnd->sc_comp_offsets = NULL; 1773 } 1774 if (vnd->sc_comp_buff) { 1775 free(vnd->sc_comp_buff, M_DEVBUF); 1776 vnd->sc_comp_buff = NULL; 1777 } 1778 if (vnd->sc_comp_decombuf) { 1779 free(vnd->sc_comp_decombuf, M_DEVBUF); 1780 vnd->sc_comp_decombuf = NULL; 1781 } 1782 } 1783 #endif /* VND_COMPRESSION */ 1784 vnd->sc_flags &= 1785 ~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL 1786 | VNF_VUNCONF | VNF_COMP | VNF_CLEARING); 1787 if (vp == NULL) 1788 panic("vndclear: null vp"); 1789 (void) vn_close(vp, fflags, vnd->sc_cred); 1790 kauth_cred_free(vnd->sc_cred); 1791 vnd->sc_vp = NULL; 1792 vnd->sc_cred = NULL; 1793 vnd->sc_size = 0; 1794 } 1795 1796 static int 1797 vndsize(dev_t dev) 1798 { 1799 struct vnd_softc *sc; 1800 struct disklabel *lp; 1801 int part, unit, omask; 1802 int size; 1803 1804 unit = vndunit(dev); 1805 sc = device_lookup_private(&vnd_cd, unit); 1806 if (sc == NULL) 1807 return -1; 1808 1809 if ((sc->sc_flags & VNF_INITED) == 0) 1810 return -1; 1811 1812 part = DISKPART(dev); 1813 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1814 lp = sc->sc_dkdev.dk_label; 1815 1816 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1817 return -1; 1818 1819 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1820 size = -1; 1821 else 1822 size = lp->d_partitions[part].p_size * 1823 (lp->d_secsize / DEV_BSIZE); 1824 1825 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1826 return -1; 1827 1828 return size; 1829 } 1830 1831 static int 1832 vnddump(dev_t dev, daddr_t blkno, void *va, 1833 size_t size) 1834 { 1835 1836 /* Not implemented. */ 1837 return ENXIO; 1838 } 1839 1840 static void 1841 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1842 { 1843 struct vndgeom *vng = &sc->sc_geom; 1844 struct partition *pp; 1845 unsigned spb; 1846 1847 memset(lp, 0, sizeof(*lp)); 1848 1849 spb = vng->vng_secsize / DEV_BSIZE; 1850 if (sc->sc_size / spb > UINT32_MAX) 1851 lp->d_secperunit = UINT32_MAX; 1852 else 1853 lp->d_secperunit = sc->sc_size / spb; 1854 lp->d_secsize = vng->vng_secsize; 1855 lp->d_nsectors = vng->vng_nsectors; 1856 lp->d_ntracks = vng->vng_ntracks; 1857 lp->d_ncylinders = vng->vng_ncylinders; 1858 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1859 1860 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1861 lp->d_type = DKTYPE_VND; 1862 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1863 lp->d_rpm = 3600; 1864 lp->d_interleave = 1; 1865 lp->d_flags = 0; 1866 1867 pp = &lp->d_partitions[RAW_PART]; 1868 pp->p_offset = 0; 1869 pp->p_size = lp->d_secperunit; 1870 pp->p_fstype = FS_UNUSED; 1871 lp->d_npartitions = RAW_PART + 1; 1872 1873 lp->d_magic = DISKMAGIC; 1874 lp->d_magic2 = DISKMAGIC; 1875 lp->d_checksum = dkcksum(lp); 1876 } 1877 1878 /* 1879 * Read the disklabel from a vnd. If one is not present, create a fake one. 1880 */ 1881 static void 1882 vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1883 { 1884 const char *errstring; 1885 struct disklabel *lp = sc->sc_dkdev.dk_label; 1886 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1887 int i; 1888 1889 memset(clp, 0, sizeof(*clp)); 1890 1891 vndgetdefaultlabel(sc, lp); 1892 1893 /* 1894 * Call the generic disklabel extraction routine. 1895 */ 1896 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1897 if (errstring) { 1898 /* 1899 * Lack of disklabel is common, but we print the warning 1900 * anyway, since it might contain other useful information. 1901 */ 1902 aprint_normal_dev(sc->sc_dev, "%s\n", errstring); 1903 1904 /* 1905 * For historical reasons, if there's no disklabel 1906 * present, all partitions must be FS_BSDFFS and 1907 * occupy the entire disk. 1908 */ 1909 for (i = 0; i < MAXPARTITIONS; i++) { 1910 /* 1911 * Don't wipe out port specific hack (such as 1912 * dos partition hack of i386 port). 1913 */ 1914 if (lp->d_partitions[i].p_size != 0) 1915 continue; 1916 1917 lp->d_partitions[i].p_size = lp->d_secperunit; 1918 lp->d_partitions[i].p_offset = 0; 1919 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1920 } 1921 1922 strncpy(lp->d_packname, "default label", 1923 sizeof(lp->d_packname)); 1924 1925 lp->d_npartitions = MAXPARTITIONS; 1926 lp->d_checksum = dkcksum(lp); 1927 } 1928 } 1929 1930 /* 1931 * Wait interruptibly for an exclusive lock. 1932 * 1933 * XXX 1934 * Several drivers do this; it should be abstracted and made MP-safe. 1935 */ 1936 static int 1937 vndlock(struct vnd_softc *sc) 1938 { 1939 int error; 1940 1941 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1942 sc->sc_flags |= VNF_WANTED; 1943 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1944 return error; 1945 } 1946 sc->sc_flags |= VNF_LOCKED; 1947 return 0; 1948 } 1949 1950 /* 1951 * Unlock and wake up any waiters. 1952 */ 1953 static void 1954 vndunlock(struct vnd_softc *sc) 1955 { 1956 1957 sc->sc_flags &= ~VNF_LOCKED; 1958 if ((sc->sc_flags & VNF_WANTED) != 0) { 1959 sc->sc_flags &= ~VNF_WANTED; 1960 wakeup(sc); 1961 } 1962 } 1963 1964 #ifdef VND_COMPRESSION 1965 /* compressed file read */ 1966 static void 1967 compstrategy(struct buf *bp, off_t bn) 1968 { 1969 int error; 1970 int unit = vndunit(bp->b_dev); 1971 struct vnd_softc *vnd = 1972 device_lookup_private(&vnd_cd, unit); 1973 u_int32_t comp_block; 1974 struct uio auio; 1975 char *addr; 1976 int s; 1977 1978 /* set up constants for data move */ 1979 auio.uio_rw = UIO_READ; 1980 UIO_SETUP_SYSSPACE(&auio); 1981 1982 /* read, and transfer the data */ 1983 addr = bp->b_data; 1984 bp->b_resid = bp->b_bcount; 1985 s = splbio(); 1986 while (bp->b_resid > 0) { 1987 unsigned length; 1988 size_t length_in_buffer; 1989 u_int32_t offset_in_buffer; 1990 struct iovec aiov; 1991 1992 /* calculate the compressed block number */ 1993 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1994 1995 /* check for good block number */ 1996 if (comp_block >= vnd->sc_comp_numoffs) { 1997 bp->b_error = EINVAL; 1998 splx(s); 1999 return; 2000 } 2001 2002 /* read in the compressed block, if not in buffer */ 2003 if (comp_block != vnd->sc_comp_buffblk) { 2004 length = vnd->sc_comp_offsets[comp_block + 1] - 2005 vnd->sc_comp_offsets[comp_block]; 2006 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 2007 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 2008 length, vnd->sc_comp_offsets[comp_block], 2009 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred, 2010 NULL, NULL); 2011 if (error) { 2012 bp->b_error = error; 2013 VOP_UNLOCK(vnd->sc_vp); 2014 splx(s); 2015 return; 2016 } 2017 /* uncompress the buffer */ 2018 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 2019 vnd->sc_comp_stream.avail_in = length; 2020 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 2021 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 2022 inflateReset(&vnd->sc_comp_stream); 2023 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 2024 if (error != Z_STREAM_END) { 2025 if (vnd->sc_comp_stream.msg) 2026 aprint_normal_dev(vnd->sc_dev, 2027 "compressed file, %s\n", 2028 vnd->sc_comp_stream.msg); 2029 bp->b_error = EBADMSG; 2030 VOP_UNLOCK(vnd->sc_vp); 2031 splx(s); 2032 return; 2033 } 2034 vnd->sc_comp_buffblk = comp_block; 2035 VOP_UNLOCK(vnd->sc_vp); 2036 } 2037 2038 /* transfer the usable uncompressed data */ 2039 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 2040 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 2041 if (length_in_buffer > bp->b_resid) 2042 length_in_buffer = bp->b_resid; 2043 auio.uio_iov = &aiov; 2044 auio.uio_iovcnt = 1; 2045 aiov.iov_base = addr; 2046 aiov.iov_len = length_in_buffer; 2047 auio.uio_resid = aiov.iov_len; 2048 auio.uio_offset = 0; 2049 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 2050 length_in_buffer, &auio); 2051 if (error) { 2052 bp->b_error = error; 2053 splx(s); 2054 return; 2055 } 2056 2057 bn += length_in_buffer; 2058 addr += length_in_buffer; 2059 bp->b_resid -= length_in_buffer; 2060 } 2061 splx(s); 2062 } 2063 2064 /* compression memory allocation routines */ 2065 static void * 2066 vnd_alloc(void *aux, u_int items, u_int siz) 2067 { 2068 return malloc(items * siz, M_TEMP, M_NOWAIT); 2069 } 2070 2071 static void 2072 vnd_free(void *aux, void *ptr) 2073 { 2074 free(ptr, M_TEMP); 2075 } 2076 #endif /* VND_COMPRESSION */ 2077 2078 static void 2079 vnd_set_geometry(struct vnd_softc *vnd) 2080 { 2081 struct disk_geom *dg = &vnd->sc_dkdev.dk_geom; 2082 2083 memset(dg, 0, sizeof(*dg)); 2084 2085 dg->dg_secperunit = (int64_t)vnd->sc_geom.vng_nsectors * 2086 vnd->sc_geom.vng_ntracks * vnd->sc_geom.vng_ncylinders; 2087 dg->dg_secsize = vnd->sc_geom.vng_secsize; 2088 dg->dg_nsectors = vnd->sc_geom.vng_nsectors; 2089 dg->dg_ntracks = vnd->sc_geom.vng_ntracks; 2090 dg->dg_ncylinders = vnd->sc_geom.vng_ncylinders; 2091 2092 #ifdef DEBUG 2093 if (vnddebug & VDB_LABEL) { 2094 printf("dg->dg_secperunit: %" PRId64 "\n", dg->dg_secperunit); 2095 printf("dg->dg_ncylinders: %u\n", dg->dg_ncylinders); 2096 } 2097 #endif 2098 disk_set_info(vnd->sc_dev, &vnd->sc_dkdev, NULL); 2099 } 2100 2101 #ifdef VND_COMPRESSION 2102 #define VND_DEPENDS "zlib" 2103 #else 2104 #define VND_DEPENDS NULL 2105 #endif 2106 2107 MODULE(MODULE_CLASS_DRIVER, vnd, VND_DEPENDS); 2108 2109 #ifdef _MODULE 2110 int vnd_bmajor = -1, vnd_cmajor = -1; 2111 2112 CFDRIVER_DECL(vnd, DV_DISK, NULL); 2113 #endif 2114 2115 static int 2116 vnd_modcmd(modcmd_t cmd, void *arg) 2117 { 2118 int error = 0; 2119 2120 switch (cmd) { 2121 case MODULE_CMD_INIT: 2122 #ifdef _MODULE 2123 error = config_cfdriver_attach(&vnd_cd); 2124 if (error) 2125 break; 2126 2127 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2128 if (error) { 2129 config_cfdriver_detach(&vnd_cd); 2130 #ifdef DIAGNOSTIC 2131 aprint_error("%s: unable to register cfattach for \n" 2132 "%s, error %d", __func__, vnd_cd.cd_name, error); 2133 #endif 2134 break; 2135 } 2136 2137 /* 2138 * Attach the {b,c}devsw's 2139 */ 2140 error = devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2141 &vnd_cdevsw, &vnd_cmajor); 2142 /* 2143 * If devsw_attach fails, remove from autoconf database 2144 */ 2145 if (error) { 2146 config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); 2147 config_cfdriver_detach(&vnd_cd); 2148 #ifdef DIAGNOSTIC 2149 aprint_error("%s: unable to attach %s devsw, " 2150 "error %d", __func__, vnd_cd.cd_name, error); 2151 #endif 2152 break; 2153 } 2154 #endif 2155 break; 2156 2157 case MODULE_CMD_FINI: 2158 #ifdef _MODULE 2159 /* 2160 * Remove {b,c}devsw's 2161 */ 2162 devsw_detach(&vnd_bdevsw, &vnd_cdevsw); 2163 2164 /* 2165 * Now remove device from autoconf database 2166 */ 2167 error = config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); 2168 if (error) { 2169 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2170 &vnd_cdevsw, &vnd_cmajor); 2171 #ifdef DIAGNOSTIC 2172 aprint_error("%s: failed to detach %s cfattach, " 2173 "error %d\n", __func__, vnd_cd.cd_name, error); 2174 #endif 2175 break; 2176 } 2177 error = config_cfdriver_detach(&vnd_cd); 2178 if (error) { 2179 (void)config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2180 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2181 &vnd_cdevsw, &vnd_cmajor); 2182 #ifdef DIAGNOSTIC 2183 aprint_error("%s: failed to detach %s cfdriver, " 2184 "error %d\n", __func__, vnd_cd.cd_name, error); 2185 break; 2186 #endif 2187 } 2188 #endif 2189 break; 2190 2191 case MODULE_CMD_STAT: 2192 return ENOTTY; 2193 2194 default: 2195 return ENOTTY; 2196 } 2197 2198 return error; 2199 } 2200