1 /* $NetBSD: vnd.c,v 1.283 2021/07/24 21:31:36 andvar Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 66 * 67 * @(#)vn.c 8.9 (Berkeley) 5/14/95 68 */ 69 70 /* 71 * Vnode disk driver. 72 * 73 * Block/character interface to a vnode. Allows one to treat a file 74 * as a disk (e.g. build a filesystem in it, mount it, etc.). 75 * 76 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 77 * this uses them to avoid distorting the local buffer cache. If those 78 * block-level operations are not available, this falls back to the regular 79 * read and write calls. Using these may distort the cache in some cases 80 * but better have the driver working than preventing it to work on file 81 * systems where the block-level operations are not implemented for 82 * whatever reason. 83 * 84 * NOTE 2: There is a security issue involved with this driver. 85 * Once mounted all access to the contents of the "mapped" file via 86 * the special file is controlled by the permissions on the special 87 * file, the protection of the mapped file is ignored (effectively, 88 * by using root credentials in all transactions). 89 * 90 * NOTE 3: Doesn't interact with leases, should it? 91 */ 92 93 #include <sys/cdefs.h> 94 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.283 2021/07/24 21:31:36 andvar Exp $"); 95 96 #if defined(_KERNEL_OPT) 97 #include "opt_vnd.h" 98 #include "opt_compat_netbsd.h" 99 #endif 100 101 #include <sys/param.h> 102 #include <sys/systm.h> 103 #include <sys/namei.h> 104 #include <sys/proc.h> 105 #include <sys/kthread.h> 106 #include <sys/errno.h> 107 #include <sys/buf.h> 108 #include <sys/bufq.h> 109 #include <sys/malloc.h> 110 #include <sys/ioctl.h> 111 #include <sys/disklabel.h> 112 #include <sys/device.h> 113 #include <sys/disk.h> 114 #include <sys/stat.h> 115 #include <sys/mount.h> 116 #include <sys/vnode.h> 117 #include <sys/fstrans.h> 118 #include <sys/file.h> 119 #include <sys/uio.h> 120 #include <sys/conf.h> 121 #include <sys/kauth.h> 122 #include <sys/module.h> 123 #include <sys/compat_stub.h> 124 #include <sys/atomic.h> 125 126 #include <net/zlib.h> 127 128 #include <miscfs/genfs/genfs.h> 129 #include <miscfs/specfs/specdev.h> 130 131 #include <dev/dkvar.h> 132 #include <dev/vndvar.h> 133 134 #include "ioconf.h" 135 136 #if defined(VNDDEBUG) && !defined(DEBUG) 137 #define DEBUG 138 #endif 139 140 #ifdef DEBUG 141 int dovndcluster = 1; 142 #define VDB_FOLLOW 0x01 143 #define VDB_INIT 0x02 144 #define VDB_IO 0x04 145 #define VDB_LABEL 0x08 146 int vnddebug = 0; 147 #endif 148 149 #define vndunit(x) DISKUNIT(x) 150 151 struct vndxfer { 152 struct buf vx_buf; 153 struct vnd_softc *vx_vnd; 154 }; 155 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 156 157 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 158 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 159 160 #define VNDLABELDEV(dev) \ 161 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 162 163 #define VND_MAXPENDING(vnd) ((vnd)->sc_maxactive * 4) 164 #define VND_MAXPAGES(vnd) (1024 * 1024 / PAGE_SIZE) 165 166 167 static void vndclear(struct vnd_softc *, int); 168 static int vnddoclear(struct vnd_softc *, int, int, bool); 169 static int vndsetcred(struct vnd_softc *, kauth_cred_t); 170 static void vndthrottle(struct vnd_softc *, struct vnode *); 171 static void vndiodone(struct buf *); 172 #if 0 173 static void vndshutdown(void); 174 #endif 175 176 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 177 static void vndgetdisklabel(dev_t, struct vnd_softc *); 178 179 static int vndlock(struct vnd_softc *); 180 static void vndunlock(struct vnd_softc *); 181 #ifdef VND_COMPRESSION 182 static void compstrategy(struct buf *, off_t); 183 static void *vnd_alloc(void *, u_int, u_int); 184 static void vnd_free(void *, void *); 185 #endif /* VND_COMPRESSION */ 186 187 static void vndthread(void *); 188 static bool vnode_has_op(const struct vnode *, int); 189 static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 190 struct buf *); 191 static void handle_with_strategy(struct vnd_softc *, const struct buf *, 192 struct buf *); 193 static void vnd_set_geometry(struct vnd_softc *); 194 195 static dev_type_open(vndopen); 196 static dev_type_close(vndclose); 197 static dev_type_read(vndread); 198 static dev_type_write(vndwrite); 199 static dev_type_ioctl(vndioctl); 200 static dev_type_strategy(vndstrategy); 201 static dev_type_dump(vnddump); 202 static dev_type_size(vndsize); 203 204 const struct bdevsw vnd_bdevsw = { 205 .d_open = vndopen, 206 .d_close = vndclose, 207 .d_strategy = vndstrategy, 208 .d_ioctl = vndioctl, 209 .d_dump = vnddump, 210 .d_psize = vndsize, 211 .d_discard = nodiscard, 212 .d_flag = D_DISK 213 }; 214 215 const struct cdevsw vnd_cdevsw = { 216 .d_open = vndopen, 217 .d_close = vndclose, 218 .d_read = vndread, 219 .d_write = vndwrite, 220 .d_ioctl = vndioctl, 221 .d_stop = nostop, 222 .d_tty = notty, 223 .d_poll = nopoll, 224 .d_mmap = nommap, 225 .d_kqfilter = nokqfilter, 226 .d_discard = nodiscard, 227 .d_flag = D_DISK 228 }; 229 230 static int vnd_match(device_t, cfdata_t, void *); 231 static void vnd_attach(device_t, device_t, void *); 232 static int vnd_detach(device_t, int); 233 234 CFATTACH_DECL3_NEW(vnd, sizeof(struct vnd_softc), 235 vnd_match, vnd_attach, vnd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN); 236 237 static struct vnd_softc *vnd_spawn(int); 238 static int vnd_destroy(device_t); 239 240 static const struct dkdriver vnddkdriver = { 241 .d_strategy = vndstrategy, 242 .d_minphys = minphys 243 }; 244 245 void 246 vndattach(int num) 247 { 248 int error; 249 250 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 251 if (error) 252 aprint_error("%s: unable to register cfattach, error = %d\n", 253 vnd_cd.cd_name, error); 254 } 255 256 static int 257 vnd_match(device_t self, cfdata_t cfdata, void *aux) 258 { 259 260 return 1; 261 } 262 263 static void 264 vnd_attach(device_t parent, device_t self, void *aux) 265 { 266 struct vnd_softc *sc = device_private(self); 267 268 sc->sc_dev = self; 269 sc->sc_comp_offsets = NULL; 270 sc->sc_comp_buff = NULL; 271 sc->sc_comp_decombuf = NULL; 272 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 273 disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver); 274 if (!pmf_device_register(self, NULL, NULL)) 275 aprint_error_dev(self, "couldn't establish power handler\n"); 276 } 277 278 static int 279 vnd_detach(device_t self, int flags) 280 { 281 int error; 282 struct vnd_softc *sc = device_private(self); 283 284 if (sc->sc_flags & VNF_INITED) { 285 error = vnddoclear(sc, 0, -1, (flags & DETACH_FORCE) != 0); 286 if (error != 0) 287 return error; 288 } 289 290 pmf_device_deregister(self); 291 bufq_free(sc->sc_tab); 292 disk_destroy(&sc->sc_dkdev); 293 294 return 0; 295 } 296 297 static struct vnd_softc * 298 vnd_spawn(int unit) 299 { 300 cfdata_t cf; 301 302 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 303 cf->cf_name = vnd_cd.cd_name; 304 cf->cf_atname = vnd_cd.cd_name; 305 cf->cf_unit = unit; 306 cf->cf_fstate = FSTATE_STAR; 307 308 return device_private(config_attach_pseudo(cf)); 309 } 310 311 static int 312 vnd_destroy(device_t dev) 313 { 314 int error; 315 cfdata_t cf; 316 317 cf = device_cfdata(dev); 318 error = config_detach(dev, DETACH_QUIET); 319 if (error) 320 return error; 321 free(cf, M_DEVBUF); 322 return 0; 323 } 324 325 static int 326 vndopen(dev_t dev, int flags, int mode, struct lwp *l) 327 { 328 int unit = vndunit(dev); 329 struct vnd_softc *sc; 330 int error = 0, part, pmask; 331 struct disklabel *lp; 332 333 #ifdef DEBUG 334 if (vnddebug & VDB_FOLLOW) 335 printf("vndopen(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 336 #endif 337 sc = device_lookup_private(&vnd_cd, unit); 338 if (sc == NULL) { 339 sc = vnd_spawn(unit); 340 if (sc == NULL) 341 return ENOMEM; 342 343 /* compatibility, keep disklabel after close */ 344 sc->sc_flags = VNF_KLABEL; 345 } 346 347 if ((error = vndlock(sc)) != 0) 348 return error; 349 350 mutex_enter(&sc->sc_dkdev.dk_openlock); 351 352 if ((sc->sc_flags & VNF_CLEARING) != 0) { 353 error = ENXIO; 354 goto done; 355 } 356 357 lp = sc->sc_dkdev.dk_label; 358 359 part = DISKPART(dev); 360 pmask = (1 << part); 361 362 if (sc->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 363 error = EBUSY; 364 goto done; 365 } 366 367 if ((flags & FWRITE) && (sc->sc_flags & VNF_READONLY)) { 368 error = EROFS; 369 goto done; 370 } 371 372 if (sc->sc_flags & VNF_INITED) { 373 if ((sc->sc_dkdev.dk_openmask & ~(1<<RAW_PART)) != 0) { 374 /* 375 * If any non-raw partition is open, but the disk 376 * has been invalidated, disallow further opens. 377 */ 378 if ((sc->sc_flags & VNF_VLABEL) == 0) { 379 error = EIO; 380 goto done; 381 } 382 } else { 383 /* 384 * Load the partition info if not already loaded. 385 */ 386 if ((sc->sc_flags & VNF_VLABEL) == 0) { 387 sc->sc_flags |= VNF_VLABEL; 388 vndgetdisklabel(dev, sc); 389 } 390 } 391 } 392 393 /* Check that the partitions exists. */ 394 if (part != RAW_PART) { 395 if (((sc->sc_flags & VNF_INITED) == 0) || 396 ((part >= lp->d_npartitions) || 397 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 398 error = ENXIO; 399 goto done; 400 } 401 } 402 403 /* Prevent our unit from being unconfigured while open. */ 404 switch (mode) { 405 case S_IFCHR: 406 sc->sc_dkdev.dk_copenmask |= pmask; 407 break; 408 409 case S_IFBLK: 410 sc->sc_dkdev.dk_bopenmask |= pmask; 411 break; 412 } 413 sc->sc_dkdev.dk_openmask = 414 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 415 416 done: 417 mutex_exit(&sc->sc_dkdev.dk_openlock); 418 vndunlock(sc); 419 return error; 420 } 421 422 static int 423 vndclose(dev_t dev, int flags, int mode, struct lwp *l) 424 { 425 int unit = vndunit(dev); 426 struct vnd_softc *sc; 427 int error = 0, part; 428 429 #ifdef DEBUG 430 if (vnddebug & VDB_FOLLOW) 431 printf("vndclose(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 432 #endif 433 sc = device_lookup_private(&vnd_cd, unit); 434 if (sc == NULL) 435 return ENXIO; 436 437 if ((error = vndlock(sc)) != 0) 438 return error; 439 440 mutex_enter(&sc->sc_dkdev.dk_openlock); 441 442 part = DISKPART(dev); 443 444 /* ...that much closer to allowing unconfiguration... */ 445 switch (mode) { 446 case S_IFCHR: 447 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 448 break; 449 450 case S_IFBLK: 451 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 452 break; 453 } 454 sc->sc_dkdev.dk_openmask = 455 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 456 457 /* are we last opener ? */ 458 if (sc->sc_dkdev.dk_openmask == 0) { 459 if ((sc->sc_flags & VNF_KLABEL) == 0) 460 sc->sc_flags &= ~VNF_VLABEL; 461 } 462 463 mutex_exit(&sc->sc_dkdev.dk_openlock); 464 465 vndunlock(sc); 466 467 if ((sc->sc_flags & VNF_INITED) == 0) { 468 if ((error = vnd_destroy(sc->sc_dev)) != 0) { 469 aprint_error_dev(sc->sc_dev, 470 "unable to detach instance\n"); 471 return error; 472 } 473 } 474 475 return 0; 476 } 477 478 /* 479 * Queue the request, and wakeup the kernel thread to handle it. 480 */ 481 static void 482 vndstrategy(struct buf *bp) 483 { 484 int unit = vndunit(bp->b_dev); 485 struct vnd_softc *vnd = 486 device_lookup_private(&vnd_cd, unit); 487 struct disklabel *lp; 488 daddr_t blkno; 489 int s = splbio(); 490 491 if (vnd == NULL) { 492 bp->b_error = ENXIO; 493 goto done; 494 } 495 lp = vnd->sc_dkdev.dk_label; 496 497 if ((vnd->sc_flags & VNF_INITED) == 0) { 498 bp->b_error = ENXIO; 499 goto done; 500 } 501 502 /* 503 * The transfer must be a whole number of blocks. 504 */ 505 if ((bp->b_bcount % lp->d_secsize) != 0) { 506 bp->b_error = EINVAL; 507 goto done; 508 } 509 510 /* 511 * check if we're read-only. 512 */ 513 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 514 bp->b_error = EACCES; 515 goto done; 516 } 517 518 /* If it's a nil transfer, wake up the top half now. */ 519 if (bp->b_bcount == 0) { 520 goto done; 521 } 522 523 /* 524 * Do bounds checking and adjust transfer. If there's an error, 525 * the bounds check will flag that for us. 526 */ 527 if (DISKPART(bp->b_dev) == RAW_PART) { 528 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 529 vnd->sc_size) <= 0) 530 goto done; 531 } else { 532 if (bounds_check_with_label(&vnd->sc_dkdev, 533 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 534 goto done; 535 } 536 537 /* 538 * Put the block number in terms of the logical blocksize 539 * of the "device". 540 */ 541 542 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 543 544 /* 545 * Translate the partition-relative block number to an absolute. 546 */ 547 if (DISKPART(bp->b_dev) != RAW_PART) { 548 struct partition *pp; 549 550 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 551 DISKPART(bp->b_dev)]; 552 blkno += pp->p_offset; 553 } 554 bp->b_rawblkno = blkno; 555 556 #ifdef DEBUG 557 if (vnddebug & VDB_FOLLOW) 558 printf("vndstrategy(%p): unit %d\n", bp, unit); 559 #endif 560 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 561 KASSERT(vnd->sc_pending >= 0 && 562 vnd->sc_pending <= VND_MAXPENDING(vnd)); 563 while (vnd->sc_pending == VND_MAXPENDING(vnd)) 564 tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0); 565 vnd->sc_pending++; 566 } 567 bufq_put(vnd->sc_tab, bp); 568 wakeup(&vnd->sc_tab); 569 splx(s); 570 return; 571 572 done: 573 bp->b_resid = bp->b_bcount; 574 biodone(bp); 575 splx(s); 576 } 577 578 static bool 579 vnode_has_strategy(struct vnd_softc *vnd) 580 { 581 return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 582 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 583 } 584 585 /* Verify that I/O requests cannot be smaller than the 586 * smallest I/O size supported by the backend. 587 */ 588 static bool 589 vnode_has_large_blocks(struct vnd_softc *vnd) 590 { 591 u_int32_t vnd_secsize, iosize; 592 593 iosize = vnd->sc_iosize; 594 vnd_secsize = vnd->sc_geom.vng_secsize; 595 596 return vnd_secsize % iosize != 0; 597 } 598 599 /* XXX this function needs a reliable check to detect 600 * sparse files. Otherwise, bmap/strategy may be used 601 * and fail on non-allocated blocks. VOP_READ/VOP_WRITE 602 * works on sparse files. 603 */ 604 #if notyet 605 static bool 606 vnode_strategy_probe(struct vnd_softc *vnd) 607 { 608 int error; 609 daddr_t nbn; 610 611 if (!vnode_has_strategy(vnd)) 612 return false; 613 614 if (vnode_has_large_blocks(vnd)) 615 return false; 616 617 /* Convert the first logical block number to its 618 * physical block number. 619 */ 620 error = 0; 621 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 622 error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL); 623 VOP_UNLOCK(vnd->sc_vp); 624 625 /* Test if that worked. */ 626 if (error == 0 && (long)nbn == -1) 627 return false; 628 629 return true; 630 } 631 #endif 632 633 static void 634 vndthread(void *arg) 635 { 636 struct vnd_softc *vnd = arg; 637 int s; 638 639 /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to 640 * directly access the backing vnode. If we can, use these two 641 * operations to avoid messing with the local buffer cache. 642 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 643 * which are guaranteed to work with any file system. */ 644 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 645 ! vnode_has_strategy(vnd)) 646 vnd->sc_flags |= VNF_USE_VN_RDWR; 647 648 /* VOP_STRATEGY can only be used if the backing vnode allows 649 * to access blocks as small as defined by the vnd geometry. 650 */ 651 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 652 vnode_has_large_blocks(vnd)) 653 vnd->sc_flags |= VNF_USE_VN_RDWR; 654 655 #ifdef DEBUG 656 if (vnddebug & VDB_INIT) 657 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 658 (vnd->sc_flags & VNF_USE_VN_RDWR) == 0 ? 659 "using bmap/strategy operations" : 660 "using read/write operations"); 661 #endif 662 663 s = splbio(); 664 vnd->sc_flags |= VNF_KTHREAD; 665 wakeup(&vnd->sc_kthread); 666 667 /* 668 * Dequeue requests and serve them depending on the available 669 * vnode operations. 670 */ 671 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 672 struct vndxfer *vnx; 673 struct buf *obp; 674 struct buf *bp; 675 676 obp = bufq_get(vnd->sc_tab); 677 if (obp == NULL) { 678 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 679 continue; 680 }; 681 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 682 KASSERT(vnd->sc_pending > 0 && 683 vnd->sc_pending <= VND_MAXPENDING(vnd)); 684 if (vnd->sc_pending-- == VND_MAXPENDING(vnd)) 685 wakeup(&vnd->sc_pending); 686 } 687 splx(s); 688 #ifdef DEBUG 689 if (vnddebug & VDB_FOLLOW) 690 printf("vndthread(%p)\n", obp); 691 #endif 692 693 if (vnd->sc_vp->v_mount == NULL) { 694 obp->b_error = ENXIO; 695 goto done; 696 } 697 #ifdef VND_COMPRESSION 698 /* handle a compressed read */ 699 if ((obp->b_flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 700 off_t bn; 701 702 /* Convert to a byte offset within the file. */ 703 bn = obp->b_rawblkno * 704 vnd->sc_dkdev.dk_label->d_secsize; 705 706 compstrategy(obp, bn); 707 goto done; 708 } 709 #endif /* VND_COMPRESSION */ 710 711 /* 712 * Allocate a header for this transfer and link it to the 713 * buffer 714 */ 715 s = splbio(); 716 vnx = VND_GETXFER(vnd); 717 splx(s); 718 vnx->vx_vnd = vnd; 719 720 s = splbio(); 721 while (vnd->sc_active >= vnd->sc_maxactive) { 722 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 723 } 724 vnd->sc_active++; 725 splx(s); 726 727 /* Instrumentation. */ 728 disk_busy(&vnd->sc_dkdev); 729 730 bp = &vnx->vx_buf; 731 buf_init(bp); 732 bp->b_flags = (obp->b_flags & (B_READ | B_PHYS | B_RAW)); 733 bp->b_oflags = obp->b_oflags; 734 bp->b_cflags = obp->b_cflags; 735 bp->b_iodone = vndiodone; 736 bp->b_private = obp; 737 bp->b_vp = vnd->sc_vp; 738 bp->b_objlock = bp->b_vp->v_interlock; 739 bp->b_data = obp->b_data; 740 bp->b_bcount = obp->b_bcount; 741 BIO_COPYPRIO(bp, obp); 742 743 /* Make sure the request succeeds while suspending this fs. */ 744 fstrans_start_lazy(vnd->sc_vp->v_mount); 745 746 /* Handle the request using the appropriate operations. */ 747 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0) 748 handle_with_strategy(vnd, obp, bp); 749 else 750 handle_with_rdwr(vnd, obp, bp); 751 752 fstrans_done(vnd->sc_vp->v_mount); 753 754 s = splbio(); 755 continue; 756 757 done: 758 biodone(obp); 759 s = splbio(); 760 } 761 762 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 763 wakeup(&vnd->sc_kthread); 764 splx(s); 765 kthread_exit(0); 766 } 767 768 /* 769 * Checks if the given vnode supports the requested operation. 770 * The operation is specified the offset returned by VOFFSET. 771 * 772 * XXX The test below used to determine this is quite fragile 773 * because it relies on the file system to use genfs to specify 774 * unimplemented operations. There might be another way to do 775 * it more cleanly. 776 */ 777 static bool 778 vnode_has_op(const struct vnode *vp, int opoffset) 779 { 780 int (*defaultp)(void *); 781 int (*opp)(void *); 782 783 defaultp = vp->v_op[VOFFSET(vop_default)]; 784 opp = vp->v_op[opoffset]; 785 786 return opp != defaultp && opp != genfs_eopnotsupp && 787 opp != genfs_badop && opp != genfs_nullop; 788 } 789 790 /* 791 * Handles the read/write request given in 'bp' using the vnode's VOP_READ 792 * and VOP_WRITE operations. 793 * 794 * 'obp' is a pointer to the original request fed to the vnd device. 795 */ 796 static void 797 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 798 { 799 bool doread; 800 off_t offset; 801 size_t len, resid; 802 struct vnode *vp; 803 int npages; 804 805 doread = bp->b_flags & B_READ; 806 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 807 len = bp->b_bcount; 808 vp = vnd->sc_vp; 809 810 #if defined(DEBUG) 811 if (vnddebug & VDB_IO) 812 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 813 ", secsize %d, offset %" PRIu64 814 ", bcount %d\n", 815 vp, doread ? "read" : "write", obp->b_rawblkno, 816 vnd->sc_dkdev.dk_label->d_secsize, offset, 817 bp->b_bcount); 818 #endif 819 820 /* Issue the read or write operation. */ 821 bp->b_error = 822 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 823 vp, bp->b_data, len, offset, UIO_SYSSPACE, 824 IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_DIRECT, 825 vnd->sc_cred, &resid, NULL); 826 bp->b_resid = resid; 827 828 /* 829 * Avoid caching too many pages, the vnd user 830 * is usually a filesystem and caches itself. 831 * We need some amount of caching to not hinder 832 * read-ahead and write-behind operations. 833 */ 834 npages = atomic_load_relaxed(&vp->v_uobj.uo_npages); 835 if (npages > VND_MAXPAGES(vnd)) { 836 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 837 (void) VOP_PUTPAGES(vp, 0, 0, 838 PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE); 839 } 840 841 /* We need to increase the number of outputs on the vnode if 842 * there was any write to it. */ 843 if (!doread) { 844 mutex_enter(vp->v_interlock); 845 vp->v_numoutput++; 846 mutex_exit(vp->v_interlock); 847 } 848 849 biodone(bp); 850 } 851 852 /* 853 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 854 * and VOP_STRATEGY operations. 855 * 856 * 'obp' is a pointer to the original request fed to the vnd device. 857 */ 858 static void 859 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 860 struct buf *bp) 861 { 862 int bsize, error, flags, skipped; 863 size_t resid, sz; 864 off_t bn, offset; 865 struct vnode *vp; 866 struct buf *nbp = NULL; 867 868 flags = obp->b_flags; 869 870 871 /* convert to a byte offset within the file. */ 872 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 873 874 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 875 skipped = 0; 876 877 /* 878 * Break the request into bsize pieces and feed them 879 * sequentially using VOP_BMAP/VOP_STRATEGY. 880 * We do it this way to keep from flooding NFS servers if we 881 * are connected to an NFS file. This places the burden on 882 * the client rather than the server. 883 */ 884 error = 0; 885 bp->b_resid = bp->b_bcount; 886 for (offset = 0, resid = bp->b_resid; /* true */; 887 resid -= sz, offset += sz) { 888 daddr_t nbn; 889 int off, nra; 890 891 nra = 0; 892 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 893 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 894 VOP_UNLOCK(vnd->sc_vp); 895 896 if (error == 0 && (long)nbn == -1) 897 error = EIO; 898 899 /* 900 * If there was an error or a hole in the file...punt. 901 * Note that we may have to wait for any operations 902 * that we have already fired off before releasing 903 * the buffer. 904 * 905 * XXX we could deal with holes here but it would be 906 * a hassle (in the write case). 907 */ 908 if (error) { 909 skipped += resid; 910 break; 911 } 912 913 #ifdef DEBUG 914 if (!dovndcluster) 915 nra = 0; 916 #endif 917 918 off = bn % bsize; 919 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 920 #ifdef DEBUG 921 if (vnddebug & VDB_IO) 922 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 923 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, 924 nbn, sz); 925 #endif 926 927 nbp = getiobuf(vp, true); 928 nestiobuf_setup(bp, nbp, offset, sz); 929 nbp->b_blkno = nbn + btodb(off); 930 931 #if 0 /* XXX #ifdef DEBUG */ 932 if (vnddebug & VDB_IO) 933 printf("vndstart(%ld): bp %p vp %p blkno " 934 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 935 (long) (vnd-vnd_softc), &nbp->vb_buf, 936 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 937 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 938 nbp->vb_buf.b_bcount); 939 #endif 940 if (resid == sz) { 941 break; 942 } 943 VOP_STRATEGY(vp, nbp); 944 bn += sz; 945 } 946 if (!(flags & B_READ)) { 947 struct vnode *w_vp; 948 /* 949 * this is the last nested buf, account for 950 * the parent buf write too. 951 * This has to be done last, so that 952 * fsync won't wait for this write which 953 * has no chance to complete before all nested bufs 954 * have been queued. But it has to be done 955 * before the last VOP_STRATEGY() 956 * or the call to nestiobuf_done(). 957 */ 958 w_vp = bp->b_vp; 959 mutex_enter(w_vp->v_interlock); 960 w_vp->v_numoutput++; 961 mutex_exit(w_vp->v_interlock); 962 } 963 KASSERT(skipped != 0 || nbp != NULL); 964 if (skipped) 965 nestiobuf_done(bp, skipped, error); 966 else 967 VOP_STRATEGY(vp, nbp); 968 } 969 970 static void 971 vndiodone(struct buf *bp) 972 { 973 struct vndxfer *vnx = VND_BUFTOXFER(bp); 974 struct vnd_softc *vnd = vnx->vx_vnd; 975 struct buf *obp = bp->b_private; 976 int s = splbio(); 977 978 KERNEL_LOCK(1, NULL); /* XXXSMP */ 979 KASSERT(&vnx->vx_buf == bp); 980 KASSERT(vnd->sc_active > 0); 981 #ifdef DEBUG 982 if (vnddebug & VDB_IO) { 983 printf("vndiodone1: bp %p iodone: error %d\n", 984 bp, bp->b_error); 985 } 986 #endif 987 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 988 (bp->b_flags & B_READ)); 989 vnd->sc_active--; 990 if (vnd->sc_active == 0) { 991 wakeup(&vnd->sc_tab); 992 } 993 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 994 splx(s); 995 obp->b_error = bp->b_error; 996 obp->b_resid = bp->b_resid; 997 buf_destroy(bp); 998 VND_PUTXFER(vnd, vnx); 999 biodone(obp); 1000 } 1001 1002 /* ARGSUSED */ 1003 static int 1004 vndread(dev_t dev, struct uio *uio, int flags) 1005 { 1006 int unit = vndunit(dev); 1007 struct vnd_softc *sc; 1008 1009 #ifdef DEBUG 1010 if (vnddebug & VDB_FOLLOW) 1011 printf("vndread(0x%"PRIx64", %p)\n", dev, uio); 1012 #endif 1013 1014 sc = device_lookup_private(&vnd_cd, unit); 1015 if (sc == NULL) 1016 return ENXIO; 1017 1018 if ((sc->sc_flags & VNF_INITED) == 0) 1019 return ENXIO; 1020 1021 return physio(vndstrategy, NULL, dev, B_READ, minphys, uio); 1022 } 1023 1024 /* ARGSUSED */ 1025 static int 1026 vndwrite(dev_t dev, struct uio *uio, int flags) 1027 { 1028 int unit = vndunit(dev); 1029 struct vnd_softc *sc; 1030 1031 #ifdef DEBUG 1032 if (vnddebug & VDB_FOLLOW) 1033 printf("vndwrite(0x%"PRIx64", %p)\n", dev, uio); 1034 #endif 1035 1036 sc = device_lookup_private(&vnd_cd, unit); 1037 if (sc == NULL) 1038 return ENXIO; 1039 1040 if ((sc->sc_flags & VNF_INITED) == 0) 1041 return ENXIO; 1042 1043 return physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio); 1044 } 1045 1046 static int 1047 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 1048 { 1049 int error; 1050 struct vnd_softc *vnd; 1051 1052 if (*un == -1) 1053 *un = unit; 1054 if (*un < 0) 1055 return EINVAL; 1056 1057 vnd = device_lookup_private(&vnd_cd, *un); 1058 if (vnd == NULL) 1059 return -1; 1060 1061 if ((vnd->sc_flags & VNF_INITED) == 0) 1062 return -1; 1063 1064 vn_lock(vnd->sc_vp, LK_SHARED | LK_RETRY); 1065 error = VOP_GETATTR(vnd->sc_vp, va, l->l_cred); 1066 VOP_UNLOCK(vnd->sc_vp); 1067 return error; 1068 } 1069 1070 static int 1071 vnddoclear(struct vnd_softc *vnd, int pmask, int minor, bool force) 1072 { 1073 int error; 1074 1075 if ((error = vndlock(vnd)) != 0) 1076 return error; 1077 1078 /* 1079 * Don't unconfigure if any other partitions are open 1080 * or if both the character and block flavors of this 1081 * partition are open. 1082 */ 1083 if (DK_BUSY(vnd, pmask) && !force) { 1084 vndunlock(vnd); 1085 return EBUSY; 1086 } 1087 1088 /* Delete all of our wedges */ 1089 dkwedge_delall(&vnd->sc_dkdev); 1090 1091 /* 1092 * XXX vndclear() might call vndclose() implicitly; 1093 * release lock to avoid recursion 1094 * 1095 * Set VNF_CLEARING to prevent vndopen() from 1096 * sneaking in after we vndunlock(). 1097 */ 1098 vnd->sc_flags |= VNF_CLEARING; 1099 vndunlock(vnd); 1100 vndclear(vnd, minor); 1101 #ifdef DEBUG 1102 if (vnddebug & VDB_INIT) 1103 printf("%s: CLRed\n", __func__); 1104 #endif 1105 1106 /* Destroy the xfer and buffer pools. */ 1107 pool_destroy(&vnd->sc_vxpool); 1108 1109 /* Detach the disk. */ 1110 disk_detach(&vnd->sc_dkdev); 1111 1112 return 0; 1113 } 1114 1115 static int 1116 vndioctl_get(struct lwp *l, void *data, int unit, struct vattr *va) 1117 { 1118 int error; 1119 1120 KASSERT(l); 1121 1122 /* the first member is always int vnd_unit in all the versions */ 1123 if (*(int *)data >= vnd_cd.cd_ndevs) 1124 return ENXIO; 1125 1126 switch (error = vnd_cget(l, unit, (int *)data, va)) { 1127 case -1: 1128 /* unused is not an error */ 1129 memset(va, 0, sizeof(*va)); 1130 /*FALLTHROUGH*/ 1131 case 0: 1132 return 0; 1133 default: 1134 return error; 1135 } 1136 } 1137 1138 /* ARGSUSED */ 1139 static int 1140 vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1141 { 1142 bool force; 1143 int unit = vndunit(dev); 1144 struct vnd_softc *vnd; 1145 struct vnd_ioctl *vio; 1146 struct vattr vattr; 1147 struct pathbuf *pb; 1148 struct vnode *vp; 1149 int error, part, pmask; 1150 uint64_t geomsize; 1151 int fflags; 1152 #ifdef __HAVE_OLD_DISKLABEL 1153 struct disklabel newlabel; 1154 #endif 1155 1156 #ifdef DEBUG 1157 if (vnddebug & VDB_FOLLOW) 1158 printf("vndioctl(0x%"PRIx64", 0x%lx, %p, 0x%x, %p): unit %d\n", 1159 dev, cmd, data, flag, l->l_proc, unit); 1160 #endif 1161 /* Do the get's first; they don't need initialization or verification */ 1162 switch (cmd) { 1163 case VNDIOCGET: 1164 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) 1165 return error; 1166 1167 struct vnd_user *vnu = data; 1168 vnu->vnu_dev = vattr.va_fsid; 1169 vnu->vnu_ino = vattr.va_fileid; 1170 return 0; 1171 1172 default: 1173 /* First check for COMPAT_50 hook */ 1174 MODULE_HOOK_CALL(compat_vndioctl_50_hook, 1175 (cmd, l, data, unit, &vattr, vndioctl_get), 1176 enosys(), error); 1177 1178 /* 1179 * If not present, then COMPAT_30 hook also not 1180 * present, so just continue with checks for the 1181 * "write" commands 1182 */ 1183 if (error == ENOSYS) { 1184 error = 0; 1185 break; 1186 } 1187 1188 /* If not already handled, try the COMPAT_30 hook */ 1189 if (error == EPASSTHROUGH) 1190 MODULE_HOOK_CALL(compat_vndioctl_30_hook, 1191 (cmd, l, data, unit, &vattr, vndioctl_get), 1192 enosys(), error); 1193 1194 /* If no COMPAT_30 module, or not handled, check writes */ 1195 if (error == ENOSYS || error == EPASSTHROUGH) { 1196 error = 0; 1197 break; 1198 } 1199 return error; 1200 } 1201 1202 vnd = device_lookup_private(&vnd_cd, unit); 1203 if (vnd == NULL) 1204 return ENXIO; 1205 vio = (struct vnd_ioctl *)data; 1206 1207 /* Must be open for writes for these commands... */ 1208 switch (cmd) { 1209 case VNDIOCSET50: 1210 case VNDIOCCLR50: 1211 if (!compat_vndioctl_50_hook.hooked) 1212 return EINVAL; 1213 /* FALLTHROUGH */ 1214 case VNDIOCSET: 1215 case VNDIOCCLR: 1216 case DIOCSDINFO: 1217 case DIOCWDINFO: 1218 #ifdef __HAVE_OLD_DISKLABEL 1219 case ODIOCSDINFO: 1220 case ODIOCWDINFO: 1221 #endif 1222 case DIOCKLABEL: 1223 case DIOCWLABEL: 1224 case DIOCCACHESYNC: 1225 if ((flag & FWRITE) == 0) 1226 return EBADF; 1227 } 1228 1229 /* Must be initialized for these... */ 1230 switch (cmd) { 1231 case VNDIOCCLR: 1232 case VNDIOCCLR50: 1233 case DIOCGDINFO: 1234 case DIOCSDINFO: 1235 case DIOCWDINFO: 1236 case DIOCGPARTINFO: 1237 case DIOCKLABEL: 1238 case DIOCWLABEL: 1239 case DIOCGDEFLABEL: 1240 case DIOCGCACHE: 1241 case DIOCGSTRATEGY: 1242 case DIOCCACHESYNC: 1243 #ifdef __HAVE_OLD_DISKLABEL 1244 case ODIOCGDINFO: 1245 case ODIOCSDINFO: 1246 case ODIOCWDINFO: 1247 case ODIOCGDEFLABEL: 1248 #endif 1249 if ((vnd->sc_flags & VNF_INITED) == 0) 1250 return ENXIO; 1251 } 1252 1253 error = disk_ioctl(&vnd->sc_dkdev, dev, cmd, data, flag, l); 1254 if (error != EPASSTHROUGH) 1255 return error; 1256 1257 1258 switch (cmd) { 1259 case VNDIOCSET50: 1260 case VNDIOCSET: 1261 if (vnd->sc_flags & VNF_INITED) 1262 return EBUSY; 1263 1264 if ((error = vndlock(vnd)) != 0) 1265 return error; 1266 1267 fflags = FREAD; 1268 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 1269 fflags |= FWRITE; 1270 if ((vio->vnd_flags & VNDIOF_FILEIO) != 0) 1271 vnd->sc_flags |= VNF_USE_VN_RDWR; 1272 error = pathbuf_copyin(vio->vnd_file, &pb); 1273 if (error) { 1274 goto unlock_and_exit; 1275 } 1276 error = vn_open(NULL, pb, 0, fflags, 0, &vp, NULL, NULL); 1277 if (error != 0) { 1278 pathbuf_destroy(pb); 1279 goto unlock_and_exit; 1280 } 1281 KASSERT(l); 1282 error = VOP_GETATTR(vp, &vattr, l->l_cred); 1283 if (!error && vp->v_type != VREG) 1284 error = EOPNOTSUPP; 1285 if (!error && vattr.va_bytes < vattr.va_size) 1286 /* File is definitely sparse, use vn_rdwr() */ 1287 vnd->sc_flags |= VNF_USE_VN_RDWR; 1288 if (error) { 1289 VOP_UNLOCK(vp); 1290 goto close_and_exit; 1291 } 1292 1293 /* If using a compressed file, initialize its info */ 1294 /* (or abort with an error if kernel has no compression) */ 1295 if (vio->vnd_flags & VNDIOF_COMP) { 1296 #ifdef VND_COMPRESSION 1297 struct vnd_comp_header *ch; 1298 int i; 1299 uint32_t comp_size; 1300 uint32_t comp_maxsize; 1301 1302 /* allocate space for compressed file header */ 1303 ch = malloc(sizeof(struct vnd_comp_header), 1304 M_TEMP, M_WAITOK); 1305 1306 /* read compressed file header */ 1307 error = vn_rdwr(UIO_READ, vp, (void *)ch, 1308 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1309 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1310 if (error) { 1311 free(ch, M_TEMP); 1312 VOP_UNLOCK(vp); 1313 goto close_and_exit; 1314 } 1315 1316 if (be32toh(ch->block_size) == 0 || 1317 be32toh(ch->num_blocks) > UINT32_MAX - 1) { 1318 free(ch, M_TEMP); 1319 VOP_UNLOCK(vp); 1320 goto close_and_exit; 1321 } 1322 1323 /* save some header info */ 1324 vnd->sc_comp_blksz = be32toh(ch->block_size); 1325 /* note last offset is the file byte size */ 1326 vnd->sc_comp_numoffs = be32toh(ch->num_blocks) + 1; 1327 free(ch, M_TEMP); 1328 if (!DK_DEV_BSIZE_OK(vnd->sc_comp_blksz)) { 1329 VOP_UNLOCK(vp); 1330 error = EINVAL; 1331 goto close_and_exit; 1332 } 1333 KASSERT(0 < vnd->sc_comp_blksz); 1334 KASSERT(0 < vnd->sc_comp_numoffs); 1335 /* 1336 * @#^@!$& gcc -Wtype-limits refuses to let me 1337 * write SIZE_MAX/sizeof(uint64_t) < numoffs, 1338 * because the range of the type on amd64 makes 1339 * the comparisons always false. 1340 */ 1341 #if SIZE_MAX <= UINT32_MAX*(64/CHAR_BIT) 1342 if (SIZE_MAX/sizeof(uint64_t) < vnd->sc_comp_numoffs) { 1343 VOP_UNLOCK(vp); 1344 error = EINVAL; 1345 goto close_and_exit; 1346 } 1347 #endif 1348 if ((vattr.va_size < sizeof(struct vnd_comp_header)) || 1349 (vattr.va_size - sizeof(struct vnd_comp_header) < 1350 sizeof(uint64_t)*vnd->sc_comp_numoffs) || 1351 (UQUAD_MAX/vnd->sc_comp_blksz < 1352 vnd->sc_comp_numoffs - 1)) { 1353 VOP_UNLOCK(vp); 1354 error = EINVAL; 1355 goto close_and_exit; 1356 } 1357 1358 /* set decompressed file size */ 1359 KASSERT(vnd->sc_comp_numoffs - 1 <= 1360 UQUAD_MAX/vnd->sc_comp_blksz); 1361 vattr.va_size = 1362 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1363 (u_quad_t)vnd->sc_comp_blksz; 1364 1365 /* allocate space for all the compressed offsets */ 1366 __CTASSERT(UINT32_MAX <= UQUAD_MAX/sizeof(uint64_t)); 1367 vnd->sc_comp_offsets = 1368 malloc(sizeof(uint64_t) * vnd->sc_comp_numoffs, 1369 M_DEVBUF, M_WAITOK); 1370 1371 /* read in the offsets */ 1372 error = vn_rdwr(UIO_READ, vp, 1373 (void *)vnd->sc_comp_offsets, 1374 sizeof(uint64_t) * vnd->sc_comp_numoffs, 1375 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1376 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1377 if (error) { 1378 VOP_UNLOCK(vp); 1379 goto close_and_exit; 1380 } 1381 /* 1382 * find largest block size (used for allocation limit). 1383 * Also convert offset to native byte order. 1384 */ 1385 comp_maxsize = 0; 1386 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1387 vnd->sc_comp_offsets[i] = 1388 be64toh(vnd->sc_comp_offsets[i]); 1389 comp_size = 1390 be64toh(vnd->sc_comp_offsets[i + 1]) 1391 - vnd->sc_comp_offsets[i]; 1392 if (comp_size > comp_maxsize) 1393 comp_maxsize = comp_size; 1394 } 1395 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1396 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs 1397 - 1]); 1398 1399 /* create compressed data buffer */ 1400 vnd->sc_comp_buff = malloc(comp_maxsize, 1401 M_DEVBUF, M_WAITOK); 1402 1403 /* create decompressed buffer */ 1404 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1405 M_DEVBUF, M_WAITOK); 1406 vnd->sc_comp_buffblk = -1; 1407 1408 /* Initialize decompress stream */ 1409 memset(&vnd->sc_comp_stream, 0, sizeof(z_stream)); 1410 vnd->sc_comp_stream.zalloc = vnd_alloc; 1411 vnd->sc_comp_stream.zfree = vnd_free; 1412 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1413 if (error) { 1414 if (vnd->sc_comp_stream.msg) 1415 printf("vnd%d: compressed file, %s\n", 1416 unit, vnd->sc_comp_stream.msg); 1417 VOP_UNLOCK(vp); 1418 error = EINVAL; 1419 goto close_and_exit; 1420 } 1421 1422 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1423 #else /* !VND_COMPRESSION */ 1424 VOP_UNLOCK(vp); 1425 error = EOPNOTSUPP; 1426 goto close_and_exit; 1427 #endif /* VND_COMPRESSION */ 1428 } 1429 1430 VOP_UNLOCK(vp); 1431 vnd->sc_vp = vp; 1432 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1433 1434 /* get smallest I/O size for underlying device, fall back to 1435 * fundamental I/O size of underlying filesystem 1436 */ 1437 error = bdev_ioctl(vattr.va_fsid, DIOCGSECTORSIZE, &vnd->sc_iosize, FKIOCTL, l); 1438 if (error) 1439 vnd->sc_iosize = vnd->sc_vp->v_mount->mnt_stat.f_frsize; 1440 1441 /* Default I/O size to DEV_BSIZE */ 1442 if (vnd->sc_iosize == 0) 1443 vnd->sc_iosize = DEV_BSIZE; 1444 1445 /* 1446 * Use pseudo-geometry specified. If none was provided, 1447 * use "standard" Adaptec fictitious geometry. 1448 */ 1449 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1450 1451 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1452 sizeof(vio->vnd_geom)); 1453 1454 /* 1455 * Sanity-check the sector size. 1456 */ 1457 if (!DK_DEV_BSIZE_OK(vnd->sc_geom.vng_secsize) || 1458 vnd->sc_geom.vng_ntracks == 0 || 1459 vnd->sc_geom.vng_nsectors == 0) { 1460 error = EINVAL; 1461 goto close_and_exit; 1462 } 1463 1464 /* 1465 * Compute missing cylinder count from size 1466 */ 1467 if (vnd->sc_geom.vng_ncylinders == 0) 1468 vnd->sc_geom.vng_ncylinders = vnd->sc_size / ( 1469 (vnd->sc_geom.vng_secsize / DEV_BSIZE) * 1470 vnd->sc_geom.vng_ntracks * 1471 vnd->sc_geom.vng_nsectors); 1472 1473 /* 1474 * Compute the size (in DEV_BSIZE blocks) specified 1475 * by the geometry. 1476 */ 1477 geomsize = (int64_t)vnd->sc_geom.vng_nsectors * 1478 vnd->sc_geom.vng_ntracks * 1479 vnd->sc_geom.vng_ncylinders * 1480 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1481 1482 /* 1483 * Sanity-check the size against the specified 1484 * geometry. 1485 */ 1486 if (vnd->sc_size < geomsize) { 1487 error = EINVAL; 1488 goto close_and_exit; 1489 } 1490 } else if (vnd->sc_size >= (32 * 64)) { 1491 /* 1492 * Size must be at least 2048 DEV_BSIZE blocks 1493 * (1M) in order to use this geometry. 1494 */ 1495 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1496 vnd->sc_geom.vng_nsectors = 32; 1497 vnd->sc_geom.vng_ntracks = 64; 1498 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1499 } else { 1500 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1501 vnd->sc_geom.vng_nsectors = 1; 1502 vnd->sc_geom.vng_ntracks = 1; 1503 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1504 } 1505 1506 vnd_set_geometry(vnd); 1507 1508 if (vio->vnd_flags & VNDIOF_READONLY) { 1509 vnd->sc_flags |= VNF_READONLY; 1510 } 1511 1512 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1513 goto close_and_exit; 1514 1515 vndthrottle(vnd, vnd->sc_vp); 1516 vio->vnd_osize = dbtob(vnd->sc_size); 1517 if (cmd != VNDIOCSET50) 1518 vio->vnd_size = dbtob(vnd->sc_size); 1519 vnd->sc_flags |= VNF_INITED; 1520 1521 /* create the kernel thread, wait for it to be up */ 1522 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, 1523 &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev)); 1524 if (error) 1525 goto close_and_exit; 1526 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1527 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1528 } 1529 #ifdef DEBUG 1530 if (vnddebug & VDB_INIT) 1531 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1532 vnd->sc_vp, (unsigned long) vnd->sc_size, 1533 vnd->sc_geom.vng_secsize, 1534 vnd->sc_geom.vng_nsectors, 1535 vnd->sc_geom.vng_ntracks, 1536 vnd->sc_geom.vng_ncylinders); 1537 #endif 1538 1539 /* Attach the disk. */ 1540 disk_attach(&vnd->sc_dkdev); 1541 1542 /* Initialize the xfer and buffer pools. */ 1543 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1544 0, 0, "vndxpl", NULL, IPL_BIO); 1545 1546 vndunlock(vnd); 1547 1548 pathbuf_destroy(pb); 1549 1550 /* Discover wedges on this disk */ 1551 dkwedge_discover(&vnd->sc_dkdev); 1552 1553 break; 1554 1555 close_and_exit: 1556 (void) vn_close(vp, fflags, l->l_cred); 1557 pathbuf_destroy(pb); 1558 unlock_and_exit: 1559 #ifdef VND_COMPRESSION 1560 /* free any allocated memory (for compressed file) */ 1561 if (vnd->sc_comp_offsets) { 1562 free(vnd->sc_comp_offsets, M_DEVBUF); 1563 vnd->sc_comp_offsets = NULL; 1564 } 1565 if (vnd->sc_comp_buff) { 1566 free(vnd->sc_comp_buff, M_DEVBUF); 1567 vnd->sc_comp_buff = NULL; 1568 } 1569 if (vnd->sc_comp_decombuf) { 1570 free(vnd->sc_comp_decombuf, M_DEVBUF); 1571 vnd->sc_comp_decombuf = NULL; 1572 } 1573 #endif /* VND_COMPRESSION */ 1574 vndunlock(vnd); 1575 return error; 1576 1577 case VNDIOCCLR50: 1578 case VNDIOCCLR: 1579 part = DISKPART(dev); 1580 pmask = (1 << part); 1581 force = (vio->vnd_flags & VNDIOF_FORCE) != 0; 1582 1583 if ((error = vnddoclear(vnd, pmask, minor(dev), force)) != 0) 1584 return error; 1585 1586 break; 1587 1588 1589 case DIOCWDINFO: 1590 case DIOCSDINFO: 1591 #ifdef __HAVE_OLD_DISKLABEL 1592 case ODIOCWDINFO: 1593 case ODIOCSDINFO: 1594 #endif 1595 { 1596 struct disklabel *lp; 1597 1598 if ((error = vndlock(vnd)) != 0) 1599 return error; 1600 1601 vnd->sc_flags |= VNF_LABELLING; 1602 1603 #ifdef __HAVE_OLD_DISKLABEL 1604 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1605 memset(&newlabel, 0, sizeof newlabel); 1606 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1607 lp = &newlabel; 1608 } else 1609 #endif 1610 lp = (struct disklabel *)data; 1611 1612 error = setdisklabel(vnd->sc_dkdev.dk_label, 1613 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1614 if (error == 0) { 1615 if (cmd == DIOCWDINFO 1616 #ifdef __HAVE_OLD_DISKLABEL 1617 || cmd == ODIOCWDINFO 1618 #endif 1619 ) 1620 error = writedisklabel(VNDLABELDEV(dev), 1621 vndstrategy, vnd->sc_dkdev.dk_label, 1622 vnd->sc_dkdev.dk_cpulabel); 1623 } 1624 1625 vnd->sc_flags &= ~VNF_LABELLING; 1626 1627 vndunlock(vnd); 1628 1629 if (error) 1630 return error; 1631 break; 1632 } 1633 1634 case DIOCKLABEL: 1635 if (*(int *)data != 0) 1636 vnd->sc_flags |= VNF_KLABEL; 1637 else 1638 vnd->sc_flags &= ~VNF_KLABEL; 1639 break; 1640 1641 case DIOCWLABEL: 1642 if (*(int *)data != 0) 1643 vnd->sc_flags |= VNF_WLABEL; 1644 else 1645 vnd->sc_flags &= ~VNF_WLABEL; 1646 break; 1647 1648 case DIOCGDEFLABEL: 1649 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1650 break; 1651 1652 #ifdef __HAVE_OLD_DISKLABEL 1653 case ODIOCGDEFLABEL: 1654 vndgetdefaultlabel(vnd, &newlabel); 1655 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1656 return ENOTTY; 1657 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1658 break; 1659 #endif 1660 1661 case DIOCGSTRATEGY: 1662 { 1663 struct disk_strategy *dks = (void *)data; 1664 1665 /* No lock needed, never changed */ 1666 strlcpy(dks->dks_name, 1667 bufq_getstrategyname(vnd->sc_tab), 1668 sizeof(dks->dks_name)); 1669 dks->dks_paramlen = 0; 1670 break; 1671 } 1672 case DIOCGCACHE: 1673 { 1674 int *bits = (int *)data; 1675 *bits |= DKCACHE_READ | DKCACHE_WRITE; 1676 break; 1677 } 1678 case DIOCCACHESYNC: 1679 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1680 error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred, 1681 FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0); 1682 VOP_UNLOCK(vnd->sc_vp); 1683 return error; 1684 1685 default: 1686 return ENOTTY; 1687 } 1688 1689 return 0; 1690 } 1691 1692 /* 1693 * Duplicate the current processes' credentials. Since we are called only 1694 * as the result of a SET ioctl and only root can do that, any future access 1695 * to this "disk" is essentially as root. Note that credentials may change 1696 * if some other uid can write directly to the mapped file (NFS). 1697 */ 1698 static int 1699 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1700 { 1701 struct uio auio; 1702 struct iovec aiov; 1703 char *tmpbuf; 1704 int error; 1705 1706 vnd->sc_cred = kauth_cred_dup(cred); 1707 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1708 1709 /* XXX: Horrible kludge to establish credentials for NFS */ 1710 aiov.iov_base = tmpbuf; 1711 aiov.iov_len = uimin(DEV_BSIZE, dbtob(vnd->sc_size)); 1712 auio.uio_iov = &aiov; 1713 auio.uio_iovcnt = 1; 1714 auio.uio_offset = 0; 1715 auio.uio_rw = UIO_READ; 1716 auio.uio_resid = aiov.iov_len; 1717 UIO_SETUP_SYSSPACE(&auio); 1718 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1719 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1720 if (error == 0) { 1721 /* 1722 * Because vnd does all IO directly through the vnode 1723 * we need to flush (at least) the buffer from the above 1724 * VOP_READ from the buffer cache to prevent cache 1725 * incoherencies. Also, be careful to write dirty 1726 * buffers back to stable storage. 1727 */ 1728 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1729 curlwp, 0, 0); 1730 } 1731 VOP_UNLOCK(vnd->sc_vp); 1732 1733 free(tmpbuf, M_TEMP); 1734 return error; 1735 } 1736 1737 /* 1738 * Set maxactive based on FS type 1739 */ 1740 static void 1741 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1742 { 1743 1744 if (vp->v_tag == VT_NFS) 1745 vnd->sc_maxactive = 2; 1746 else 1747 vnd->sc_maxactive = 8; 1748 1749 if (vnd->sc_maxactive < 1) 1750 vnd->sc_maxactive = 1; 1751 } 1752 1753 #if 0 1754 static void 1755 vndshutdown(void) 1756 { 1757 struct vnd_softc *vnd; 1758 1759 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1760 if (vnd->sc_flags & VNF_INITED) 1761 vndclear(vnd); 1762 } 1763 #endif 1764 1765 static void 1766 vndclear(struct vnd_softc *vnd, int myminor) 1767 { 1768 struct vnode *vp = vnd->sc_vp; 1769 int fflags = FREAD; 1770 int bmaj, cmaj, i, mn; 1771 int s; 1772 1773 #ifdef DEBUG 1774 if (vnddebug & VDB_FOLLOW) 1775 printf("vndclear(%p): vp %p\n", vnd, vp); 1776 #endif 1777 /* locate the major number */ 1778 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1779 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1780 1781 /* Nuke the vnodes for any open instances */ 1782 for (i = 0; i < MAXPARTITIONS; i++) { 1783 mn = DISKMINOR(device_unit(vnd->sc_dev), i); 1784 vdevgone(bmaj, mn, mn, VBLK); 1785 if (mn != myminor) /* XXX avoid to kill own vnode */ 1786 vdevgone(cmaj, mn, mn, VCHR); 1787 } 1788 1789 if ((vnd->sc_flags & VNF_READONLY) == 0) 1790 fflags |= FWRITE; 1791 1792 s = splbio(); 1793 bufq_drain(vnd->sc_tab); 1794 splx(s); 1795 1796 vnd->sc_flags |= VNF_VUNCONF; 1797 wakeup(&vnd->sc_tab); 1798 while (vnd->sc_flags & VNF_KTHREAD) 1799 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1800 1801 #ifdef VND_COMPRESSION 1802 /* free the compressed file buffers */ 1803 if (vnd->sc_flags & VNF_COMP) { 1804 if (vnd->sc_comp_offsets) { 1805 free(vnd->sc_comp_offsets, M_DEVBUF); 1806 vnd->sc_comp_offsets = NULL; 1807 } 1808 if (vnd->sc_comp_buff) { 1809 free(vnd->sc_comp_buff, M_DEVBUF); 1810 vnd->sc_comp_buff = NULL; 1811 } 1812 if (vnd->sc_comp_decombuf) { 1813 free(vnd->sc_comp_decombuf, M_DEVBUF); 1814 vnd->sc_comp_decombuf = NULL; 1815 } 1816 } 1817 #endif /* VND_COMPRESSION */ 1818 vnd->sc_flags &= 1819 ~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL 1820 | VNF_VUNCONF | VNF_COMP | VNF_CLEARING); 1821 if (vp == NULL) 1822 panic("vndclear: null vp"); 1823 (void) vn_close(vp, fflags, vnd->sc_cred); 1824 kauth_cred_free(vnd->sc_cred); 1825 vnd->sc_vp = NULL; 1826 vnd->sc_cred = NULL; 1827 vnd->sc_size = 0; 1828 } 1829 1830 static int 1831 vndsize(dev_t dev) 1832 { 1833 struct vnd_softc *sc; 1834 struct disklabel *lp; 1835 int part, unit, omask; 1836 int size; 1837 1838 unit = vndunit(dev); 1839 sc = device_lookup_private(&vnd_cd, unit); 1840 if (sc == NULL) 1841 return -1; 1842 1843 if ((sc->sc_flags & VNF_INITED) == 0) 1844 return -1; 1845 1846 part = DISKPART(dev); 1847 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1848 lp = sc->sc_dkdev.dk_label; 1849 1850 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1851 return -1; 1852 1853 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1854 size = -1; 1855 else 1856 size = lp->d_partitions[part].p_size * 1857 (lp->d_secsize / DEV_BSIZE); 1858 1859 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1860 return -1; 1861 1862 return size; 1863 } 1864 1865 static int 1866 vnddump(dev_t dev, daddr_t blkno, void *va, 1867 size_t size) 1868 { 1869 1870 /* Not implemented. */ 1871 return ENXIO; 1872 } 1873 1874 static void 1875 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1876 { 1877 struct vndgeom *vng = &sc->sc_geom; 1878 struct partition *pp; 1879 unsigned spb; 1880 1881 memset(lp, 0, sizeof(*lp)); 1882 1883 spb = vng->vng_secsize / DEV_BSIZE; 1884 if (sc->sc_size / spb > UINT32_MAX) 1885 lp->d_secperunit = UINT32_MAX; 1886 else 1887 lp->d_secperunit = sc->sc_size / spb; 1888 lp->d_secsize = vng->vng_secsize; 1889 lp->d_nsectors = vng->vng_nsectors; 1890 lp->d_ntracks = vng->vng_ntracks; 1891 lp->d_ncylinders = vng->vng_ncylinders; 1892 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1893 1894 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1895 lp->d_type = DKTYPE_VND; 1896 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1897 lp->d_rpm = 3600; 1898 lp->d_interleave = 1; 1899 lp->d_flags = 0; 1900 1901 pp = &lp->d_partitions[RAW_PART]; 1902 pp->p_offset = 0; 1903 pp->p_size = lp->d_secperunit; 1904 pp->p_fstype = FS_UNUSED; 1905 lp->d_npartitions = RAW_PART + 1; 1906 1907 lp->d_magic = DISKMAGIC; 1908 lp->d_magic2 = DISKMAGIC; 1909 lp->d_checksum = dkcksum(lp); 1910 } 1911 1912 /* 1913 * Read the disklabel from a vnd. If one is not present, create a fake one. 1914 */ 1915 static void 1916 vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1917 { 1918 const char *errstring; 1919 struct disklabel *lp = sc->sc_dkdev.dk_label; 1920 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1921 int i; 1922 1923 memset(clp, 0, sizeof(*clp)); 1924 1925 vndgetdefaultlabel(sc, lp); 1926 1927 /* 1928 * Call the generic disklabel extraction routine. 1929 */ 1930 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1931 if (errstring) { 1932 /* 1933 * Lack of disklabel is common, but we print the warning 1934 * anyway, since it might contain other useful information. 1935 */ 1936 aprint_normal_dev(sc->sc_dev, "%s\n", errstring); 1937 1938 /* 1939 * For historical reasons, if there's no disklabel 1940 * present, all partitions must be FS_BSDFFS and 1941 * occupy the entire disk. 1942 */ 1943 for (i = 0; i < MAXPARTITIONS; i++) { 1944 /* 1945 * Don't wipe out port specific hack (such as 1946 * dos partition hack of i386 port). 1947 */ 1948 if (lp->d_partitions[i].p_size != 0) 1949 continue; 1950 1951 lp->d_partitions[i].p_size = lp->d_secperunit; 1952 lp->d_partitions[i].p_offset = 0; 1953 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1954 } 1955 1956 strncpy(lp->d_packname, "default label", 1957 sizeof(lp->d_packname)); 1958 1959 lp->d_npartitions = MAXPARTITIONS; 1960 lp->d_checksum = dkcksum(lp); 1961 } 1962 } 1963 1964 /* 1965 * Wait interruptibly for an exclusive lock. 1966 * 1967 * XXX 1968 * Several drivers do this; it should be abstracted and made MP-safe. 1969 */ 1970 static int 1971 vndlock(struct vnd_softc *sc) 1972 { 1973 int error; 1974 1975 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1976 sc->sc_flags |= VNF_WANTED; 1977 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1978 return error; 1979 } 1980 sc->sc_flags |= VNF_LOCKED; 1981 return 0; 1982 } 1983 1984 /* 1985 * Unlock and wake up any waiters. 1986 */ 1987 static void 1988 vndunlock(struct vnd_softc *sc) 1989 { 1990 1991 sc->sc_flags &= ~VNF_LOCKED; 1992 if ((sc->sc_flags & VNF_WANTED) != 0) { 1993 sc->sc_flags &= ~VNF_WANTED; 1994 wakeup(sc); 1995 } 1996 } 1997 1998 #ifdef VND_COMPRESSION 1999 /* compressed file read */ 2000 static void 2001 compstrategy(struct buf *bp, off_t bn) 2002 { 2003 int error; 2004 int unit = vndunit(bp->b_dev); 2005 struct vnd_softc *vnd = 2006 device_lookup_private(&vnd_cd, unit); 2007 u_int32_t comp_block; 2008 struct uio auio; 2009 char *addr; 2010 int s; 2011 2012 /* set up constants for data move */ 2013 auio.uio_rw = UIO_READ; 2014 UIO_SETUP_SYSSPACE(&auio); 2015 2016 /* read, and transfer the data */ 2017 addr = bp->b_data; 2018 bp->b_resid = bp->b_bcount; 2019 s = splbio(); 2020 while (bp->b_resid > 0) { 2021 unsigned length; 2022 size_t length_in_buffer; 2023 u_int32_t offset_in_buffer; 2024 struct iovec aiov; 2025 2026 /* calculate the compressed block number */ 2027 comp_block = bn / (off_t)vnd->sc_comp_blksz; 2028 2029 /* check for good block number */ 2030 if (comp_block >= vnd->sc_comp_numoffs) { 2031 bp->b_error = EINVAL; 2032 splx(s); 2033 return; 2034 } 2035 2036 /* read in the compressed block, if not in buffer */ 2037 if (comp_block != vnd->sc_comp_buffblk) { 2038 length = vnd->sc_comp_offsets[comp_block + 1] - 2039 vnd->sc_comp_offsets[comp_block]; 2040 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 2041 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 2042 length, vnd->sc_comp_offsets[comp_block], 2043 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred, 2044 NULL, NULL); 2045 if (error) { 2046 bp->b_error = error; 2047 VOP_UNLOCK(vnd->sc_vp); 2048 splx(s); 2049 return; 2050 } 2051 /* uncompress the buffer */ 2052 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 2053 vnd->sc_comp_stream.avail_in = length; 2054 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 2055 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 2056 inflateReset(&vnd->sc_comp_stream); 2057 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 2058 if (error != Z_STREAM_END) { 2059 if (vnd->sc_comp_stream.msg) 2060 aprint_normal_dev(vnd->sc_dev, 2061 "compressed file, %s\n", 2062 vnd->sc_comp_stream.msg); 2063 bp->b_error = EBADMSG; 2064 VOP_UNLOCK(vnd->sc_vp); 2065 splx(s); 2066 return; 2067 } 2068 vnd->sc_comp_buffblk = comp_block; 2069 VOP_UNLOCK(vnd->sc_vp); 2070 } 2071 2072 /* transfer the usable uncompressed data */ 2073 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 2074 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 2075 if (length_in_buffer > bp->b_resid) 2076 length_in_buffer = bp->b_resid; 2077 auio.uio_iov = &aiov; 2078 auio.uio_iovcnt = 1; 2079 aiov.iov_base = addr; 2080 aiov.iov_len = length_in_buffer; 2081 auio.uio_resid = aiov.iov_len; 2082 auio.uio_offset = 0; 2083 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 2084 length_in_buffer, &auio); 2085 if (error) { 2086 bp->b_error = error; 2087 splx(s); 2088 return; 2089 } 2090 2091 bn += length_in_buffer; 2092 addr += length_in_buffer; 2093 bp->b_resid -= length_in_buffer; 2094 } 2095 splx(s); 2096 } 2097 2098 /* compression memory allocation routines */ 2099 static void * 2100 vnd_alloc(void *aux, u_int items, u_int siz) 2101 { 2102 return malloc(items * siz, M_TEMP, M_NOWAIT); 2103 } 2104 2105 static void 2106 vnd_free(void *aux, void *ptr) 2107 { 2108 free(ptr, M_TEMP); 2109 } 2110 #endif /* VND_COMPRESSION */ 2111 2112 static void 2113 vnd_set_geometry(struct vnd_softc *vnd) 2114 { 2115 struct disk_geom *dg = &vnd->sc_dkdev.dk_geom; 2116 unsigned spb; 2117 2118 memset(dg, 0, sizeof(*dg)); 2119 2120 spb = vnd->sc_geom.vng_secsize / DEV_BSIZE; 2121 dg->dg_secperunit = vnd->sc_size / spb; 2122 dg->dg_secsize = vnd->sc_geom.vng_secsize; 2123 dg->dg_nsectors = vnd->sc_geom.vng_nsectors; 2124 dg->dg_ntracks = vnd->sc_geom.vng_ntracks; 2125 dg->dg_ncylinders = vnd->sc_geom.vng_ncylinders; 2126 2127 #ifdef DEBUG 2128 if (vnddebug & VDB_LABEL) { 2129 printf("dg->dg_secperunit: %" PRId64 "\n", dg->dg_secperunit); 2130 printf("dg->dg_ncylinders: %u\n", dg->dg_ncylinders); 2131 } 2132 #endif 2133 disk_set_info(vnd->sc_dev, &vnd->sc_dkdev, NULL); 2134 } 2135 2136 #ifdef VND_COMPRESSION 2137 #define VND_DEPENDS "zlib" 2138 #else 2139 #define VND_DEPENDS NULL 2140 #endif 2141 2142 MODULE(MODULE_CLASS_DRIVER, vnd, VND_DEPENDS); 2143 2144 #ifdef _MODULE 2145 int vnd_bmajor = -1, vnd_cmajor = -1; 2146 2147 CFDRIVER_DECL(vnd, DV_DISK, NULL); 2148 #endif 2149 2150 static int 2151 vnd_modcmd(modcmd_t cmd, void *arg) 2152 { 2153 int error = 0; 2154 2155 switch (cmd) { 2156 case MODULE_CMD_INIT: 2157 #ifdef _MODULE 2158 error = config_cfdriver_attach(&vnd_cd); 2159 if (error) 2160 break; 2161 2162 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2163 if (error) { 2164 config_cfdriver_detach(&vnd_cd); 2165 #ifdef DIAGNOSTIC 2166 aprint_error("%s: unable to register cfattach for \n" 2167 "%s, error %d", __func__, vnd_cd.cd_name, error); 2168 #endif 2169 break; 2170 } 2171 2172 /* 2173 * Attach the {b,c}devsw's 2174 */ 2175 error = devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2176 &vnd_cdevsw, &vnd_cmajor); 2177 /* 2178 * If devsw_attach fails, remove from autoconf database 2179 */ 2180 if (error) { 2181 config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); 2182 config_cfdriver_detach(&vnd_cd); 2183 #ifdef DIAGNOSTIC 2184 aprint_error("%s: unable to attach %s devsw, " 2185 "error %d", __func__, vnd_cd.cd_name, error); 2186 #endif 2187 break; 2188 } 2189 #endif 2190 break; 2191 2192 case MODULE_CMD_FINI: 2193 #ifdef _MODULE 2194 /* 2195 * Remove {b,c}devsw's 2196 */ 2197 devsw_detach(&vnd_bdevsw, &vnd_cdevsw); 2198 2199 /* 2200 * Now remove device from autoconf database 2201 */ 2202 error = config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); 2203 if (error) { 2204 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2205 &vnd_cdevsw, &vnd_cmajor); 2206 #ifdef DIAGNOSTIC 2207 aprint_error("%s: failed to detach %s cfattach, " 2208 "error %d\n", __func__, vnd_cd.cd_name, error); 2209 #endif 2210 break; 2211 } 2212 error = config_cfdriver_detach(&vnd_cd); 2213 if (error) { 2214 (void)config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2215 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2216 &vnd_cdevsw, &vnd_cmajor); 2217 #ifdef DIAGNOSTIC 2218 aprint_error("%s: failed to detach %s cfdriver, " 2219 "error %d\n", __func__, vnd_cd.cd_name, error); 2220 break; 2221 #endif 2222 } 2223 #endif 2224 break; 2225 2226 case MODULE_CMD_STAT: 2227 return ENOTTY; 2228 2229 default: 2230 return ENOTTY; 2231 } 2232 2233 return error; 2234 } 2235