1 /* $NetBSD: vnd.c,v 1.187 2008/09/24 07:57:30 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1990, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * the Systems Programming Group of the University of Utah Computer 38 * Science Department. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 65 * 66 * @(#)vn.c 8.9 (Berkeley) 5/14/95 67 */ 68 69 /* 70 * Copyright (c) 1988 University of Utah. 71 * 72 * This code is derived from software contributed to Berkeley by 73 * the Systems Programming Group of the University of Utah Computer 74 * Science Department. 75 * 76 * Redistribution and use in source and binary forms, with or without 77 * modification, are permitted provided that the following conditions 78 * are met: 79 * 1. Redistributions of source code must retain the above copyright 80 * notice, this list of conditions and the following disclaimer. 81 * 2. Redistributions in binary form must reproduce the above copyright 82 * notice, this list of conditions and the following disclaimer in the 83 * documentation and/or other materials provided with the distribution. 84 * 3. All advertising materials mentioning features or use of this software 85 * must display the following acknowledgement: 86 * This product includes software developed by the University of 87 * California, Berkeley and its contributors. 88 * 4. Neither the name of the University nor the names of its contributors 89 * may be used to endorse or promote products derived from this software 90 * without specific prior written permission. 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 102 * SUCH DAMAGE. 103 * 104 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 105 * 106 * @(#)vn.c 8.9 (Berkeley) 5/14/95 107 */ 108 109 /* 110 * Vnode disk driver. 111 * 112 * Block/character interface to a vnode. Allows one to treat a file 113 * as a disk (e.g. build a filesystem in it, mount it, etc.). 114 * 115 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 116 * this uses them to avoid distorting the local buffer cache. If those 117 * block-level operations are not available, this falls back to the regular 118 * read and write calls. Using these may distort the cache in some cases 119 * but better have the driver working than preventing it to work on file 120 * systems where the block-level operations are not implemented for 121 * whatever reason. 122 * 123 * NOTE 2: There is a security issue involved with this driver. 124 * Once mounted all access to the contents of the "mapped" file via 125 * the special file is controlled by the permissions on the special 126 * file, the protection of the mapped file is ignored (effectively, 127 * by using root credentials in all transactions). 128 * 129 * NOTE 3: Doesn't interact with leases, should it? 130 */ 131 132 #include <sys/cdefs.h> 133 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.187 2008/09/24 07:57:30 ad Exp $"); 134 135 #if defined(_KERNEL_OPT) 136 #include "fs_nfs.h" 137 #include "opt_vnd.h" 138 #endif 139 140 #include <sys/param.h> 141 #include <sys/systm.h> 142 #include <sys/namei.h> 143 #include <sys/proc.h> 144 #include <sys/kthread.h> 145 #include <sys/errno.h> 146 #include <sys/buf.h> 147 #include <sys/bufq.h> 148 #include <sys/malloc.h> 149 #include <sys/ioctl.h> 150 #include <sys/disklabel.h> 151 #include <sys/device.h> 152 #include <sys/disk.h> 153 #include <sys/stat.h> 154 #include <sys/mount.h> 155 #include <sys/vnode.h> 156 #include <sys/file.h> 157 #include <sys/uio.h> 158 #include <sys/conf.h> 159 #include <sys/kauth.h> 160 161 #include <net/zlib.h> 162 163 #include <miscfs/genfs/genfs.h> 164 #include <miscfs/specfs/specdev.h> 165 166 #include <dev/vndvar.h> 167 168 #include <prop/proplib.h> 169 170 #if defined(VNDDEBUG) && !defined(DEBUG) 171 #define DEBUG 172 #endif 173 174 #ifdef DEBUG 175 int dovndcluster = 1; 176 #define VDB_FOLLOW 0x01 177 #define VDB_INIT 0x02 178 #define VDB_IO 0x04 179 #define VDB_LABEL 0x08 180 int vnddebug = 0x00; 181 #endif 182 183 #define vndunit(x) DISKUNIT(x) 184 185 struct vndxfer { 186 struct buf vx_buf; 187 struct vnd_softc *vx_vnd; 188 }; 189 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 190 191 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 192 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 193 194 #define VNDLABELDEV(dev) \ 195 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 196 197 /* called by main() at boot time (XXX: and the LKM driver) */ 198 void vndattach(int); 199 200 static void vndclear(struct vnd_softc *, int); 201 static int vndsetcred(struct vnd_softc *, kauth_cred_t); 202 static void vndthrottle(struct vnd_softc *, struct vnode *); 203 static void vndiodone(struct buf *); 204 #if 0 205 static void vndshutdown(void); 206 #endif 207 208 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 209 static void vndgetdisklabel(dev_t, struct vnd_softc *); 210 211 static int vndlock(struct vnd_softc *); 212 static void vndunlock(struct vnd_softc *); 213 #ifdef VND_COMPRESSION 214 static void compstrategy(struct buf *, off_t); 215 static void *vnd_alloc(void *, u_int, u_int); 216 static void vnd_free(void *, void *); 217 #endif /* VND_COMPRESSION */ 218 219 static void vndthread(void *); 220 static bool vnode_has_op(const struct vnode *, int); 221 static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 222 struct buf *); 223 static void handle_with_strategy(struct vnd_softc *, const struct buf *, 224 struct buf *); 225 static void vnd_set_properties(struct vnd_softc *); 226 227 static dev_type_open(vndopen); 228 static dev_type_close(vndclose); 229 static dev_type_read(vndread); 230 static dev_type_write(vndwrite); 231 static dev_type_ioctl(vndioctl); 232 static dev_type_strategy(vndstrategy); 233 static dev_type_dump(vnddump); 234 static dev_type_size(vndsize); 235 236 const struct bdevsw vnd_bdevsw = { 237 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 238 }; 239 240 const struct cdevsw vnd_cdevsw = { 241 vndopen, vndclose, vndread, vndwrite, vndioctl, 242 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 243 }; 244 245 static int vnd_match(device_t, cfdata_t, void *); 246 static void vnd_attach(device_t, device_t, void *); 247 static int vnd_detach(device_t, int); 248 249 CFATTACH_DECL_NEW(vnd, sizeof(struct vnd_softc), 250 vnd_match, vnd_attach, vnd_detach, NULL); 251 extern struct cfdriver vnd_cd; 252 253 static struct vnd_softc *vnd_spawn(int); 254 int vnd_destroy(device_t); 255 256 void 257 vndattach(int num) 258 { 259 int error; 260 261 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 262 if (error) 263 aprint_error("%s: unable to register cfattach\n", 264 vnd_cd.cd_name); 265 } 266 267 static int 268 vnd_match(device_t self, cfdata_t cfdata, void *aux) 269 { 270 271 return 1; 272 } 273 274 static void 275 vnd_attach(device_t parent, device_t self, void *aux) 276 { 277 struct vnd_softc *sc = device_private(self); 278 279 sc->sc_dev = self; 280 sc->sc_comp_offsets = NULL; 281 sc->sc_comp_buff = NULL; 282 sc->sc_comp_decombuf = NULL; 283 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 284 disk_init(&sc->sc_dkdev, device_xname(self), NULL); 285 if (!pmf_device_register(self, NULL, NULL)) 286 aprint_error_dev(self, "couldn't establish power handler\n"); 287 } 288 289 static int 290 vnd_detach(device_t self, int flags) 291 { 292 struct vnd_softc *sc = device_private(self); 293 if (sc->sc_flags & VNF_INITED) 294 return EBUSY; 295 296 pmf_device_deregister(self); 297 bufq_free(sc->sc_tab); 298 disk_destroy(&sc->sc_dkdev); 299 300 return 0; 301 } 302 303 static struct vnd_softc * 304 vnd_spawn(int unit) 305 { 306 struct cfdata *cf; 307 308 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 309 cf->cf_name = vnd_cd.cd_name; 310 cf->cf_atname = vnd_cd.cd_name; 311 cf->cf_unit = unit; 312 cf->cf_fstate = FSTATE_STAR; 313 314 return device_private(config_attach_pseudo(cf)); 315 } 316 317 int 318 vnd_destroy(device_t dev) 319 { 320 int error; 321 cfdata_t cf; 322 323 cf = device_cfdata(dev); 324 error = config_detach(dev, DETACH_QUIET); 325 if (error) 326 return error; 327 free(cf, M_DEVBUF); 328 return 0; 329 } 330 331 static int 332 vndopen(dev_t dev, int flags, int mode, struct lwp *l) 333 { 334 int unit = vndunit(dev); 335 struct vnd_softc *sc; 336 int error = 0, part, pmask; 337 struct disklabel *lp; 338 339 #ifdef DEBUG 340 if (vnddebug & VDB_FOLLOW) 341 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 342 #endif 343 sc = device_lookup_private(&vnd_cd, unit); 344 if (sc == NULL) { 345 sc = vnd_spawn(unit); 346 if (sc == NULL) 347 return ENOMEM; 348 } 349 350 if ((error = vndlock(sc)) != 0) 351 return (error); 352 353 lp = sc->sc_dkdev.dk_label; 354 355 part = DISKPART(dev); 356 pmask = (1 << part); 357 358 /* 359 * If we're initialized, check to see if there are any other 360 * open partitions. If not, then it's safe to update the 361 * in-core disklabel. Only read the disklabel if it is 362 * not already valid. 363 */ 364 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 365 sc->sc_dkdev.dk_openmask == 0) 366 vndgetdisklabel(dev, sc); 367 368 /* Check that the partitions exists. */ 369 if (part != RAW_PART) { 370 if (((sc->sc_flags & VNF_INITED) == 0) || 371 ((part >= lp->d_npartitions) || 372 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 373 error = ENXIO; 374 goto done; 375 } 376 } 377 378 /* Prevent our unit from being unconfigured while open. */ 379 switch (mode) { 380 case S_IFCHR: 381 sc->sc_dkdev.dk_copenmask |= pmask; 382 break; 383 384 case S_IFBLK: 385 sc->sc_dkdev.dk_bopenmask |= pmask; 386 break; 387 } 388 sc->sc_dkdev.dk_openmask = 389 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 390 391 done: 392 vndunlock(sc); 393 return (error); 394 } 395 396 static int 397 vndclose(dev_t dev, int flags, int mode, struct lwp *l) 398 { 399 int unit = vndunit(dev); 400 struct vnd_softc *sc; 401 int error = 0, part; 402 403 #ifdef DEBUG 404 if (vnddebug & VDB_FOLLOW) 405 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 406 #endif 407 sc = device_lookup_private(&vnd_cd, unit); 408 if (sc == NULL) 409 return ENXIO; 410 411 if ((error = vndlock(sc)) != 0) 412 return (error); 413 414 part = DISKPART(dev); 415 416 /* ...that much closer to allowing unconfiguration... */ 417 switch (mode) { 418 case S_IFCHR: 419 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 420 break; 421 422 case S_IFBLK: 423 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 424 break; 425 } 426 sc->sc_dkdev.dk_openmask = 427 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 428 429 vndunlock(sc); 430 431 if ((sc->sc_flags & VNF_INITED) == 0) { 432 if ((error = vnd_destroy(sc->sc_dev)) != 0) { 433 aprint_error_dev(sc->sc_dev, 434 "unable to detach instance\n"); 435 return error; 436 } 437 } 438 439 return (0); 440 } 441 442 /* 443 * Queue the request, and wakeup the kernel thread to handle it. 444 */ 445 static void 446 vndstrategy(struct buf *bp) 447 { 448 int unit = vndunit(bp->b_dev); 449 struct vnd_softc *vnd = 450 device_lookup_private(&vnd_cd, unit); 451 struct disklabel *lp = vnd->sc_dkdev.dk_label; 452 daddr_t blkno; 453 int s = splbio(); 454 455 if ((vnd->sc_flags & VNF_INITED) == 0) { 456 bp->b_error = ENXIO; 457 goto done; 458 } 459 460 /* 461 * The transfer must be a whole number of blocks. 462 */ 463 if ((bp->b_bcount % lp->d_secsize) != 0) { 464 bp->b_error = EINVAL; 465 goto done; 466 } 467 468 /* 469 * check if we're read-only. 470 */ 471 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 472 bp->b_error = EACCES; 473 goto done; 474 } 475 476 /* If it's a nil transfer, wake up the top half now. */ 477 if (bp->b_bcount == 0) { 478 goto done; 479 } 480 481 /* 482 * Do bounds checking and adjust transfer. If there's an error, 483 * the bounds check will flag that for us. 484 */ 485 if (DISKPART(bp->b_dev) == RAW_PART) { 486 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 487 vnd->sc_size) <= 0) 488 goto done; 489 } else { 490 if (bounds_check_with_label(&vnd->sc_dkdev, 491 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 492 goto done; 493 } 494 495 /* 496 * Put the block number in terms of the logical blocksize 497 * of the "device". 498 */ 499 500 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 501 502 /* 503 * Translate the partition-relative block number to an absolute. 504 */ 505 if (DISKPART(bp->b_dev) != RAW_PART) { 506 struct partition *pp; 507 508 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 509 DISKPART(bp->b_dev)]; 510 blkno += pp->p_offset; 511 } 512 bp->b_rawblkno = blkno; 513 514 #ifdef DEBUG 515 if (vnddebug & VDB_FOLLOW) 516 printf("vndstrategy(%p): unit %d\n", bp, unit); 517 #endif 518 BUFQ_PUT(vnd->sc_tab, bp); 519 wakeup(&vnd->sc_tab); 520 splx(s); 521 return; 522 523 done: 524 bp->b_resid = bp->b_bcount; 525 biodone(bp); 526 splx(s); 527 } 528 529 static bool 530 vnode_has_strategy(struct vnd_softc *vnd) 531 { 532 return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 533 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 534 } 535 536 /* XXX this function needs a reliable check to detect 537 * sparse files. Otherwise, bmap/strategy may be used 538 * and fail on non-allocated blocks. VOP_READ/VOP_WRITE 539 * works on sparse files. 540 */ 541 #if notyet 542 static bool 543 vnode_strategy_probe(struct vnd_softc *vnd) 544 { 545 int error; 546 daddr_t nbn; 547 548 if (!vnode_has_strategy(vnd)) 549 return false; 550 551 /* Convert the first logical block number to its 552 * physical block number. 553 */ 554 error = 0; 555 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 556 error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL); 557 VOP_UNLOCK(vnd->sc_vp, 0); 558 559 /* Test if that worked. */ 560 if (error == 0 && (long)nbn == -1) 561 return false; 562 563 return true; 564 } 565 #endif 566 567 static void 568 vndthread(void *arg) 569 { 570 struct vnd_softc *vnd = arg; 571 bool usestrategy; 572 int s; 573 574 /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to 575 * directly access the backing vnode. If we can, use these two 576 * operations to avoid messing with the local buffer cache. 577 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 578 * which are guaranteed to work with any file system. */ 579 usestrategy = vnode_has_strategy(vnd); 580 581 #ifdef DEBUG 582 if (vnddebug & VDB_INIT) 583 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 584 usestrategy ? 585 "using bmap/strategy operations" : 586 "using read/write operations"); 587 #endif 588 589 s = splbio(); 590 vnd->sc_flags |= VNF_KTHREAD; 591 wakeup(&vnd->sc_kthread); 592 593 /* 594 * Dequeue requests and serve them depending on the available 595 * vnode operations. 596 */ 597 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 598 struct vndxfer *vnx; 599 int flags; 600 struct buf *obp; 601 struct buf *bp; 602 603 obp = BUFQ_GET(vnd->sc_tab); 604 if (obp == NULL) { 605 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 606 continue; 607 }; 608 splx(s); 609 flags = obp->b_flags; 610 #ifdef DEBUG 611 if (vnddebug & VDB_FOLLOW) 612 printf("vndthread(%p)\n", obp); 613 #endif 614 615 if (vnd->sc_vp->v_mount == NULL) { 616 obp->b_error = ENXIO; 617 goto done; 618 } 619 #ifdef VND_COMPRESSION 620 /* handle a compressed read */ 621 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 622 off_t bn; 623 624 /* Convert to a byte offset within the file. */ 625 bn = obp->b_rawblkno * 626 vnd->sc_dkdev.dk_label->d_secsize; 627 628 compstrategy(obp, bn); 629 goto done; 630 } 631 #endif /* VND_COMPRESSION */ 632 633 /* 634 * Allocate a header for this transfer and link it to the 635 * buffer 636 */ 637 s = splbio(); 638 vnx = VND_GETXFER(vnd); 639 splx(s); 640 vnx->vx_vnd = vnd; 641 642 s = splbio(); 643 while (vnd->sc_active >= vnd->sc_maxactive) { 644 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 645 } 646 vnd->sc_active++; 647 splx(s); 648 649 /* Instrumentation. */ 650 disk_busy(&vnd->sc_dkdev); 651 652 bp = &vnx->vx_buf; 653 buf_init(bp); 654 bp->b_flags = (obp->b_flags & B_READ); 655 bp->b_oflags = obp->b_oflags; 656 bp->b_cflags = obp->b_cflags; 657 bp->b_iodone = vndiodone; 658 bp->b_private = obp; 659 bp->b_vp = vnd->sc_vp; 660 bp->b_objlock = &bp->b_vp->v_interlock; 661 bp->b_data = obp->b_data; 662 bp->b_bcount = obp->b_bcount; 663 BIO_COPYPRIO(bp, obp); 664 665 /* Handle the request using the appropriate operations. */ 666 if (usestrategy) 667 handle_with_strategy(vnd, obp, bp); 668 else 669 handle_with_rdwr(vnd, obp, bp); 670 671 s = splbio(); 672 continue; 673 674 done: 675 biodone(obp); 676 s = splbio(); 677 } 678 679 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 680 wakeup(&vnd->sc_kthread); 681 splx(s); 682 kthread_exit(0); 683 } 684 685 /* 686 * Checks if the given vnode supports the requested operation. 687 * The operation is specified the offset returned by VOFFSET. 688 * 689 * XXX The test below used to determine this is quite fragile 690 * because it relies on the file system to use genfs to specify 691 * unimplemented operations. There might be another way to do 692 * it more cleanly. 693 */ 694 static bool 695 vnode_has_op(const struct vnode *vp, int opoffset) 696 { 697 int (*defaultp)(void *); 698 int (*opp)(void *); 699 700 defaultp = vp->v_op[VOFFSET(vop_default)]; 701 opp = vp->v_op[opoffset]; 702 703 return opp != defaultp && opp != genfs_eopnotsupp && 704 opp != genfs_badop && opp != genfs_nullop; 705 } 706 707 /* 708 * Handes the read/write request given in 'bp' using the vnode's VOP_READ 709 * and VOP_WRITE operations. 710 * 711 * 'obp' is a pointer to the original request fed to the vnd device. 712 */ 713 static void 714 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 715 { 716 bool doread; 717 off_t offset; 718 size_t resid; 719 struct vnode *vp; 720 721 doread = bp->b_flags & B_READ; 722 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 723 vp = vnd->sc_vp; 724 725 #if defined(DEBUG) 726 if (vnddebug & VDB_IO) 727 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 728 ", secsize %d, offset %" PRIu64 729 ", bcount %d\n", 730 vp, doread ? "read" : "write", obp->b_rawblkno, 731 vnd->sc_dkdev.dk_label->d_secsize, offset, 732 bp->b_bcount); 733 #endif 734 735 /* Issue the read or write operation. */ 736 bp->b_error = 737 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 738 vp, bp->b_data, bp->b_bcount, offset, 739 UIO_SYSSPACE, 0, vnd->sc_cred, &resid, NULL); 740 bp->b_resid = resid; 741 742 /* We need to increase the number of outputs on the vnode if 743 * there was any write to it. */ 744 if (!doread) { 745 mutex_enter(&vp->v_interlock); 746 vp->v_numoutput++; 747 mutex_exit(&vp->v_interlock); 748 } 749 750 biodone(bp); 751 } 752 753 /* 754 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 755 * and VOP_STRATEGY operations. 756 * 757 * 'obp' is a pointer to the original request fed to the vnd device. 758 */ 759 static void 760 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 761 struct buf *bp) 762 { 763 int bsize, error, flags, skipped; 764 size_t resid, sz; 765 off_t bn, offset; 766 struct vnode *vp; 767 768 flags = obp->b_flags; 769 770 if (!(flags & B_READ)) { 771 vp = bp->b_vp; 772 mutex_enter(&vp->v_interlock); 773 vp->v_numoutput++; 774 mutex_exit(&vp->v_interlock); 775 } 776 777 /* convert to a byte offset within the file. */ 778 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 779 780 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 781 skipped = 0; 782 783 /* 784 * Break the request into bsize pieces and feed them 785 * sequentially using VOP_BMAP/VOP_STRATEGY. 786 * We do it this way to keep from flooding NFS servers if we 787 * are connected to an NFS file. This places the burden on 788 * the client rather than the server. 789 */ 790 error = 0; 791 bp->b_resid = bp->b_bcount; 792 for (offset = 0, resid = bp->b_resid; resid; 793 resid -= sz, offset += sz) { 794 struct buf *nbp; 795 daddr_t nbn; 796 int off, nra; 797 798 nra = 0; 799 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 800 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 801 VOP_UNLOCK(vnd->sc_vp, 0); 802 803 if (error == 0 && (long)nbn == -1) 804 error = EIO; 805 806 /* 807 * If there was an error or a hole in the file...punt. 808 * Note that we may have to wait for any operations 809 * that we have already fired off before releasing 810 * the buffer. 811 * 812 * XXX we could deal with holes here but it would be 813 * a hassle (in the write case). 814 */ 815 if (error) { 816 skipped += resid; 817 break; 818 } 819 820 #ifdef DEBUG 821 if (!dovndcluster) 822 nra = 0; 823 #endif 824 825 off = bn % bsize; 826 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 827 #ifdef DEBUG 828 if (vnddebug & VDB_IO) 829 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 830 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, 831 nbn, sz); 832 #endif 833 834 nbp = getiobuf(vp, true); 835 nestiobuf_setup(bp, nbp, offset, sz); 836 nbp->b_blkno = nbn + btodb(off); 837 838 #if 0 /* XXX #ifdef DEBUG */ 839 if (vnddebug & VDB_IO) 840 printf("vndstart(%ld): bp %p vp %p blkno " 841 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 842 (long) (vnd-vnd_softc), &nbp->vb_buf, 843 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 844 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 845 nbp->vb_buf.b_bcount); 846 #endif 847 VOP_STRATEGY(vp, nbp); 848 bn += sz; 849 } 850 nestiobuf_done(bp, skipped, error); 851 } 852 853 static void 854 vndiodone(struct buf *bp) 855 { 856 struct vndxfer *vnx = VND_BUFTOXFER(bp); 857 struct vnd_softc *vnd = vnx->vx_vnd; 858 struct buf *obp = bp->b_private; 859 860 KASSERT(&vnx->vx_buf == bp); 861 KASSERT(vnd->sc_active > 0); 862 #ifdef DEBUG 863 if (vnddebug & VDB_IO) { 864 printf("vndiodone1: bp %p iodone: error %d\n", 865 bp, bp->b_error); 866 } 867 #endif 868 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 869 (bp->b_flags & B_READ)); 870 vnd->sc_active--; 871 if (vnd->sc_active == 0) { 872 wakeup(&vnd->sc_tab); 873 } 874 obp->b_error = bp->b_error; 875 obp->b_resid = bp->b_resid; 876 buf_destroy(bp); 877 VND_PUTXFER(vnd, vnx); 878 biodone(obp); 879 } 880 881 /* ARGSUSED */ 882 static int 883 vndread(dev_t dev, struct uio *uio, int flags) 884 { 885 int unit = vndunit(dev); 886 struct vnd_softc *sc; 887 888 #ifdef DEBUG 889 if (vnddebug & VDB_FOLLOW) 890 printf("vndread(0x%x, %p)\n", dev, uio); 891 #endif 892 893 sc = device_lookup_private(&vnd_cd, unit); 894 if (sc == NULL) 895 return ENXIO; 896 897 if ((sc->sc_flags & VNF_INITED) == 0) 898 return (ENXIO); 899 900 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 901 } 902 903 /* ARGSUSED */ 904 static int 905 vndwrite(dev_t dev, struct uio *uio, int flags) 906 { 907 int unit = vndunit(dev); 908 struct vnd_softc *sc; 909 910 #ifdef DEBUG 911 if (vnddebug & VDB_FOLLOW) 912 printf("vndwrite(0x%x, %p)\n", dev, uio); 913 #endif 914 915 sc = device_lookup_private(&vnd_cd, unit); 916 if (sc == NULL) 917 return ENXIO; 918 919 if ((sc->sc_flags & VNF_INITED) == 0) 920 return (ENXIO); 921 922 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 923 } 924 925 static int 926 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 927 { 928 struct vnd_softc *vnd; 929 930 if (*un == -1) 931 *un = unit; 932 if (*un < 0) 933 return EINVAL; 934 935 vnd = device_lookup_private(&vnd_cd, *un); 936 if (vnd == NULL) 937 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1; 938 939 if ((vnd->sc_flags & VNF_INITED) == 0) 940 return -1; 941 942 return VOP_GETATTR(vnd->sc_vp, va, l->l_cred); 943 } 944 945 /* ARGSUSED */ 946 static int 947 vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 948 { 949 int unit = vndunit(dev); 950 struct vnd_softc *vnd; 951 struct vnd_ioctl *vio; 952 struct vattr vattr; 953 struct nameidata nd; 954 int error, part, pmask; 955 size_t geomsize; 956 int fflags; 957 #ifdef __HAVE_OLD_DISKLABEL 958 struct disklabel newlabel; 959 #endif 960 961 #ifdef DEBUG 962 if (vnddebug & VDB_FOLLOW) 963 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 964 dev, cmd, data, flag, l->l_proc, unit); 965 #endif 966 vnd = device_lookup_private(&vnd_cd, unit); 967 if (vnd == NULL && 968 #ifdef COMPAT_30 969 cmd != VNDIOOCGET && 970 #endif 971 cmd != VNDIOCGET) 972 return ENXIO; 973 vio = (struct vnd_ioctl *)data; 974 975 /* Must be open for writes for these commands... */ 976 switch (cmd) { 977 case VNDIOCSET: 978 case VNDIOCCLR: 979 case DIOCSDINFO: 980 case DIOCWDINFO: 981 #ifdef __HAVE_OLD_DISKLABEL 982 case ODIOCSDINFO: 983 case ODIOCWDINFO: 984 #endif 985 case DIOCKLABEL: 986 case DIOCWLABEL: 987 if ((flag & FWRITE) == 0) 988 return (EBADF); 989 } 990 991 /* Must be initialized for these... */ 992 switch (cmd) { 993 case VNDIOCCLR: 994 case DIOCGDINFO: 995 case DIOCSDINFO: 996 case DIOCWDINFO: 997 case DIOCGPART: 998 case DIOCKLABEL: 999 case DIOCWLABEL: 1000 case DIOCGDEFLABEL: 1001 #ifdef __HAVE_OLD_DISKLABEL 1002 case ODIOCGDINFO: 1003 case ODIOCSDINFO: 1004 case ODIOCWDINFO: 1005 case ODIOCGDEFLABEL: 1006 #endif 1007 if ((vnd->sc_flags & VNF_INITED) == 0) 1008 return (ENXIO); 1009 } 1010 1011 switch (cmd) { 1012 case VNDIOCSET: 1013 if (vnd->sc_flags & VNF_INITED) 1014 return (EBUSY); 1015 1016 if ((error = vndlock(vnd)) != 0) 1017 return (error); 1018 1019 fflags = FREAD; 1020 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 1021 fflags |= FWRITE; 1022 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file); 1023 if ((error = vn_open(&nd, fflags, 0)) != 0) 1024 goto unlock_and_exit; 1025 KASSERT(l); 1026 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred); 1027 if (!error && nd.ni_vp->v_type != VREG) 1028 error = EOPNOTSUPP; 1029 if (error) { 1030 VOP_UNLOCK(nd.ni_vp, 0); 1031 goto close_and_exit; 1032 } 1033 1034 /* If using a compressed file, initialize its info */ 1035 /* (or abort with an error if kernel has no compression) */ 1036 if (vio->vnd_flags & VNF_COMP) { 1037 #ifdef VND_COMPRESSION 1038 struct vnd_comp_header *ch; 1039 int i; 1040 u_int32_t comp_size; 1041 u_int32_t comp_maxsize; 1042 1043 /* allocate space for compresed file header */ 1044 ch = malloc(sizeof(struct vnd_comp_header), 1045 M_TEMP, M_WAITOK); 1046 1047 /* read compressed file header */ 1048 error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch, 1049 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1050 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1051 if(error) { 1052 free(ch, M_TEMP); 1053 VOP_UNLOCK(nd.ni_vp, 0); 1054 goto close_and_exit; 1055 } 1056 1057 /* save some header info */ 1058 vnd->sc_comp_blksz = ntohl(ch->block_size); 1059 /* note last offset is the file byte size */ 1060 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 1061 free(ch, M_TEMP); 1062 if (vnd->sc_comp_blksz == 0 || 1063 vnd->sc_comp_blksz % DEV_BSIZE !=0) { 1064 VOP_UNLOCK(nd.ni_vp, 0); 1065 error = EINVAL; 1066 goto close_and_exit; 1067 } 1068 if(sizeof(struct vnd_comp_header) + 1069 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 1070 vattr.va_size) { 1071 VOP_UNLOCK(nd.ni_vp, 0); 1072 error = EINVAL; 1073 goto close_and_exit; 1074 } 1075 1076 /* set decompressed file size */ 1077 vattr.va_size = 1078 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1079 (u_quad_t)vnd->sc_comp_blksz; 1080 1081 /* allocate space for all the compressed offsets */ 1082 vnd->sc_comp_offsets = 1083 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 1084 M_DEVBUF, M_WAITOK); 1085 1086 /* read in the offsets */ 1087 error = vn_rdwr(UIO_READ, nd.ni_vp, 1088 (void *)vnd->sc_comp_offsets, 1089 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 1090 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1091 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1092 if(error) { 1093 VOP_UNLOCK(nd.ni_vp, 0); 1094 goto close_and_exit; 1095 } 1096 /* 1097 * find largest block size (used for allocation limit). 1098 * Also convert offset to native byte order. 1099 */ 1100 comp_maxsize = 0; 1101 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1102 vnd->sc_comp_offsets[i] = 1103 be64toh(vnd->sc_comp_offsets[i]); 1104 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 1105 - vnd->sc_comp_offsets[i]; 1106 if (comp_size > comp_maxsize) 1107 comp_maxsize = comp_size; 1108 } 1109 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1110 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 1111 1112 /* create compressed data buffer */ 1113 vnd->sc_comp_buff = malloc(comp_maxsize, 1114 M_DEVBUF, M_WAITOK); 1115 1116 /* create decompressed buffer */ 1117 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1118 M_DEVBUF, M_WAITOK); 1119 vnd->sc_comp_buffblk = -1; 1120 1121 /* Initialize decompress stream */ 1122 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 1123 vnd->sc_comp_stream.zalloc = vnd_alloc; 1124 vnd->sc_comp_stream.zfree = vnd_free; 1125 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1126 if(error) { 1127 if(vnd->sc_comp_stream.msg) 1128 printf("vnd%d: compressed file, %s\n", 1129 unit, vnd->sc_comp_stream.msg); 1130 VOP_UNLOCK(nd.ni_vp, 0); 1131 error = EINVAL; 1132 goto close_and_exit; 1133 } 1134 1135 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1136 #else /* !VND_COMPRESSION */ 1137 VOP_UNLOCK(nd.ni_vp, 0); 1138 error = EOPNOTSUPP; 1139 goto close_and_exit; 1140 #endif /* VND_COMPRESSION */ 1141 } 1142 1143 VOP_UNLOCK(nd.ni_vp, 0); 1144 vnd->sc_vp = nd.ni_vp; 1145 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1146 1147 /* 1148 * Use pseudo-geometry specified. If none was provided, 1149 * use "standard" Adaptec fictitious geometry. 1150 */ 1151 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1152 1153 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1154 sizeof(vio->vnd_geom)); 1155 1156 /* 1157 * Sanity-check the sector size. 1158 * XXX Don't allow secsize < DEV_BSIZE. Should 1159 * XXX we? 1160 */ 1161 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 1162 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 1163 vnd->sc_geom.vng_ncylinders == 0 || 1164 (vnd->sc_geom.vng_ntracks * 1165 vnd->sc_geom.vng_nsectors) == 0) { 1166 error = EINVAL; 1167 goto close_and_exit; 1168 } 1169 1170 /* 1171 * Compute the size (in DEV_BSIZE blocks) specified 1172 * by the geometry. 1173 */ 1174 geomsize = (vnd->sc_geom.vng_nsectors * 1175 vnd->sc_geom.vng_ntracks * 1176 vnd->sc_geom.vng_ncylinders) * 1177 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1178 1179 /* 1180 * Sanity-check the size against the specified 1181 * geometry. 1182 */ 1183 if (vnd->sc_size < geomsize) { 1184 error = EINVAL; 1185 goto close_and_exit; 1186 } 1187 } else if (vnd->sc_size >= (32 * 64)) { 1188 /* 1189 * Size must be at least 2048 DEV_BSIZE blocks 1190 * (1M) in order to use this geometry. 1191 */ 1192 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1193 vnd->sc_geom.vng_nsectors = 32; 1194 vnd->sc_geom.vng_ntracks = 64; 1195 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1196 } else { 1197 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1198 vnd->sc_geom.vng_nsectors = 1; 1199 vnd->sc_geom.vng_ntracks = 1; 1200 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1201 } 1202 1203 vnd_set_properties(vnd); 1204 1205 if (vio->vnd_flags & VNDIOF_READONLY) { 1206 vnd->sc_flags |= VNF_READONLY; 1207 } 1208 1209 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1210 goto close_and_exit; 1211 1212 vndthrottle(vnd, vnd->sc_vp); 1213 vio->vnd_size = dbtob(vnd->sc_size); 1214 vnd->sc_flags |= VNF_INITED; 1215 1216 /* create the kernel thread, wait for it to be up */ 1217 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, 1218 &vnd->sc_kthread, device_xname(vnd->sc_dev)); 1219 if (error) 1220 goto close_and_exit; 1221 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1222 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1223 } 1224 #ifdef DEBUG 1225 if (vnddebug & VDB_INIT) 1226 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1227 vnd->sc_vp, (unsigned long) vnd->sc_size, 1228 vnd->sc_geom.vng_secsize, 1229 vnd->sc_geom.vng_nsectors, 1230 vnd->sc_geom.vng_ntracks, 1231 vnd->sc_geom.vng_ncylinders); 1232 #endif 1233 1234 /* Attach the disk. */ 1235 disk_attach(&vnd->sc_dkdev); 1236 1237 /* Initialize the xfer and buffer pools. */ 1238 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1239 0, 0, "vndxpl", NULL, IPL_BIO); 1240 1241 /* Try and read the disklabel. */ 1242 vndgetdisklabel(dev, vnd); 1243 1244 vndunlock(vnd); 1245 1246 break; 1247 1248 close_and_exit: 1249 (void) vn_close(nd.ni_vp, fflags, l->l_cred); 1250 unlock_and_exit: 1251 #ifdef VND_COMPRESSION 1252 /* free any allocated memory (for compressed file) */ 1253 if(vnd->sc_comp_offsets) { 1254 free(vnd->sc_comp_offsets, M_DEVBUF); 1255 vnd->sc_comp_offsets = NULL; 1256 } 1257 if(vnd->sc_comp_buff) { 1258 free(vnd->sc_comp_buff, M_DEVBUF); 1259 vnd->sc_comp_buff = NULL; 1260 } 1261 if(vnd->sc_comp_decombuf) { 1262 free(vnd->sc_comp_decombuf, M_DEVBUF); 1263 vnd->sc_comp_decombuf = NULL; 1264 } 1265 #endif /* VND_COMPRESSION */ 1266 vndunlock(vnd); 1267 return (error); 1268 1269 case VNDIOCCLR: 1270 if ((error = vndlock(vnd)) != 0) 1271 return (error); 1272 1273 /* 1274 * Don't unconfigure if any other partitions are open 1275 * or if both the character and block flavors of this 1276 * partition are open. 1277 */ 1278 part = DISKPART(dev); 1279 pmask = (1 << part); 1280 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1281 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1282 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1283 !(vio->vnd_flags & VNDIOF_FORCE)) { 1284 vndunlock(vnd); 1285 return (EBUSY); 1286 } 1287 1288 /* 1289 * XXX vndclear() might call vndclose() implicitely; 1290 * release lock to avoid recursion 1291 */ 1292 vndunlock(vnd); 1293 vndclear(vnd, minor(dev)); 1294 #ifdef DEBUG 1295 if (vnddebug & VDB_INIT) 1296 printf("vndioctl: CLRed\n"); 1297 #endif 1298 1299 /* Destroy the xfer and buffer pools. */ 1300 pool_destroy(&vnd->sc_vxpool); 1301 1302 /* Detatch the disk. */ 1303 disk_detach(&vnd->sc_dkdev); 1304 break; 1305 1306 #ifdef COMPAT_30 1307 case VNDIOOCGET: { 1308 struct vnd_ouser *vnu; 1309 struct vattr va; 1310 vnu = (struct vnd_ouser *)data; 1311 KASSERT(l); 1312 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1313 case 0: 1314 vnu->vnu_dev = va.va_fsid; 1315 vnu->vnu_ino = va.va_fileid; 1316 break; 1317 case -1: 1318 /* unused is not an error */ 1319 vnu->vnu_dev = 0; 1320 vnu->vnu_ino = 0; 1321 break; 1322 default: 1323 return error; 1324 } 1325 break; 1326 } 1327 #endif 1328 case VNDIOCGET: { 1329 struct vnd_user *vnu; 1330 struct vattr va; 1331 vnu = (struct vnd_user *)data; 1332 KASSERT(l); 1333 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1334 case 0: 1335 vnu->vnu_dev = va.va_fsid; 1336 vnu->vnu_ino = va.va_fileid; 1337 break; 1338 case -1: 1339 /* unused is not an error */ 1340 vnu->vnu_dev = 0; 1341 vnu->vnu_ino = 0; 1342 break; 1343 default: 1344 return error; 1345 } 1346 break; 1347 } 1348 1349 case DIOCGDINFO: 1350 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1351 break; 1352 1353 #ifdef __HAVE_OLD_DISKLABEL 1354 case ODIOCGDINFO: 1355 newlabel = *(vnd->sc_dkdev.dk_label); 1356 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1357 return ENOTTY; 1358 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1359 break; 1360 #endif 1361 1362 case DIOCGPART: 1363 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1364 ((struct partinfo *)data)->part = 1365 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1366 break; 1367 1368 case DIOCWDINFO: 1369 case DIOCSDINFO: 1370 #ifdef __HAVE_OLD_DISKLABEL 1371 case ODIOCWDINFO: 1372 case ODIOCSDINFO: 1373 #endif 1374 { 1375 struct disklabel *lp; 1376 1377 if ((error = vndlock(vnd)) != 0) 1378 return (error); 1379 1380 vnd->sc_flags |= VNF_LABELLING; 1381 1382 #ifdef __HAVE_OLD_DISKLABEL 1383 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1384 memset(&newlabel, 0, sizeof newlabel); 1385 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1386 lp = &newlabel; 1387 } else 1388 #endif 1389 lp = (struct disklabel *)data; 1390 1391 error = setdisklabel(vnd->sc_dkdev.dk_label, 1392 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1393 if (error == 0) { 1394 if (cmd == DIOCWDINFO 1395 #ifdef __HAVE_OLD_DISKLABEL 1396 || cmd == ODIOCWDINFO 1397 #endif 1398 ) 1399 error = writedisklabel(VNDLABELDEV(dev), 1400 vndstrategy, vnd->sc_dkdev.dk_label, 1401 vnd->sc_dkdev.dk_cpulabel); 1402 } 1403 1404 vnd->sc_flags &= ~VNF_LABELLING; 1405 1406 vndunlock(vnd); 1407 1408 if (error) 1409 return (error); 1410 break; 1411 } 1412 1413 case DIOCKLABEL: 1414 if (*(int *)data != 0) 1415 vnd->sc_flags |= VNF_KLABEL; 1416 else 1417 vnd->sc_flags &= ~VNF_KLABEL; 1418 break; 1419 1420 case DIOCWLABEL: 1421 if (*(int *)data != 0) 1422 vnd->sc_flags |= VNF_WLABEL; 1423 else 1424 vnd->sc_flags &= ~VNF_WLABEL; 1425 break; 1426 1427 case DIOCGDEFLABEL: 1428 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1429 break; 1430 1431 #ifdef __HAVE_OLD_DISKLABEL 1432 case ODIOCGDEFLABEL: 1433 vndgetdefaultlabel(vnd, &newlabel); 1434 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1435 return ENOTTY; 1436 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1437 break; 1438 #endif 1439 1440 default: 1441 return (ENOTTY); 1442 } 1443 1444 return (0); 1445 } 1446 1447 /* 1448 * Duplicate the current processes' credentials. Since we are called only 1449 * as the result of a SET ioctl and only root can do that, any future access 1450 * to this "disk" is essentially as root. Note that credentials may change 1451 * if some other uid can write directly to the mapped file (NFS). 1452 */ 1453 static int 1454 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1455 { 1456 struct uio auio; 1457 struct iovec aiov; 1458 char *tmpbuf; 1459 int error; 1460 1461 vnd->sc_cred = kauth_cred_dup(cred); 1462 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1463 1464 /* XXX: Horrible kludge to establish credentials for NFS */ 1465 aiov.iov_base = tmpbuf; 1466 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1467 auio.uio_iov = &aiov; 1468 auio.uio_iovcnt = 1; 1469 auio.uio_offset = 0; 1470 auio.uio_rw = UIO_READ; 1471 auio.uio_resid = aiov.iov_len; 1472 UIO_SETUP_SYSSPACE(&auio); 1473 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1474 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1475 if (error == 0) { 1476 /* 1477 * Because vnd does all IO directly through the vnode 1478 * we need to flush (at least) the buffer from the above 1479 * VOP_READ from the buffer cache to prevent cache 1480 * incoherencies. Also, be careful to write dirty 1481 * buffers back to stable storage. 1482 */ 1483 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1484 curlwp, 0, 0); 1485 } 1486 VOP_UNLOCK(vnd->sc_vp, 0); 1487 1488 free(tmpbuf, M_TEMP); 1489 return (error); 1490 } 1491 1492 /* 1493 * Set maxactive based on FS type 1494 */ 1495 static void 1496 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1497 { 1498 #ifdef NFS 1499 extern int (**nfsv2_vnodeop_p)(void *); 1500 1501 if (vp->v_op == nfsv2_vnodeop_p) 1502 vnd->sc_maxactive = 2; 1503 else 1504 #endif 1505 vnd->sc_maxactive = 8; 1506 1507 if (vnd->sc_maxactive < 1) 1508 vnd->sc_maxactive = 1; 1509 } 1510 1511 #if 0 1512 static void 1513 vndshutdown(void) 1514 { 1515 struct vnd_softc *vnd; 1516 1517 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1518 if (vnd->sc_flags & VNF_INITED) 1519 vndclear(vnd); 1520 } 1521 #endif 1522 1523 static void 1524 vndclear(struct vnd_softc *vnd, int myminor) 1525 { 1526 struct vnode *vp = vnd->sc_vp; 1527 int fflags = FREAD; 1528 int bmaj, cmaj, i, mn; 1529 int s; 1530 1531 #ifdef DEBUG 1532 if (vnddebug & VDB_FOLLOW) 1533 printf("vndclear(%p): vp %p\n", vnd, vp); 1534 #endif 1535 /* locate the major number */ 1536 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1537 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1538 1539 /* Nuke the vnodes for any open instances */ 1540 for (i = 0; i < MAXPARTITIONS; i++) { 1541 mn = DISKMINOR(device_unit(vnd->sc_dev), i); 1542 vdevgone(bmaj, mn, mn, VBLK); 1543 if (mn != myminor) /* XXX avoid to kill own vnode */ 1544 vdevgone(cmaj, mn, mn, VCHR); 1545 } 1546 1547 if ((vnd->sc_flags & VNF_READONLY) == 0) 1548 fflags |= FWRITE; 1549 1550 s = splbio(); 1551 bufq_drain(vnd->sc_tab); 1552 splx(s); 1553 1554 vnd->sc_flags |= VNF_VUNCONF; 1555 wakeup(&vnd->sc_tab); 1556 while (vnd->sc_flags & VNF_KTHREAD) 1557 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1558 1559 #ifdef VND_COMPRESSION 1560 /* free the compressed file buffers */ 1561 if(vnd->sc_flags & VNF_COMP) { 1562 if(vnd->sc_comp_offsets) { 1563 free(vnd->sc_comp_offsets, M_DEVBUF); 1564 vnd->sc_comp_offsets = NULL; 1565 } 1566 if(vnd->sc_comp_buff) { 1567 free(vnd->sc_comp_buff, M_DEVBUF); 1568 vnd->sc_comp_buff = NULL; 1569 } 1570 if(vnd->sc_comp_decombuf) { 1571 free(vnd->sc_comp_decombuf, M_DEVBUF); 1572 vnd->sc_comp_decombuf = NULL; 1573 } 1574 } 1575 #endif /* VND_COMPRESSION */ 1576 vnd->sc_flags &= 1577 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1578 | VNF_VUNCONF | VNF_COMP); 1579 if (vp == (struct vnode *)0) 1580 panic("vndclear: null vp"); 1581 (void) vn_close(vp, fflags, vnd->sc_cred); 1582 kauth_cred_free(vnd->sc_cred); 1583 vnd->sc_vp = (struct vnode *)0; 1584 vnd->sc_cred = (kauth_cred_t)0; 1585 vnd->sc_size = 0; 1586 } 1587 1588 static int 1589 vndsize(dev_t dev) 1590 { 1591 struct vnd_softc *sc; 1592 struct disklabel *lp; 1593 int part, unit, omask; 1594 int size; 1595 1596 unit = vndunit(dev); 1597 sc = device_lookup_private(&vnd_cd, unit); 1598 if (sc == NULL) 1599 return -1; 1600 1601 if ((sc->sc_flags & VNF_INITED) == 0) 1602 return (-1); 1603 1604 part = DISKPART(dev); 1605 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1606 lp = sc->sc_dkdev.dk_label; 1607 1608 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1609 return (-1); 1610 1611 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1612 size = -1; 1613 else 1614 size = lp->d_partitions[part].p_size * 1615 (lp->d_secsize / DEV_BSIZE); 1616 1617 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1618 return (-1); 1619 1620 return (size); 1621 } 1622 1623 static int 1624 vnddump(dev_t dev, daddr_t blkno, void *va, 1625 size_t size) 1626 { 1627 1628 /* Not implemented. */ 1629 return ENXIO; 1630 } 1631 1632 static void 1633 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1634 { 1635 struct vndgeom *vng = &sc->sc_geom; 1636 struct partition *pp; 1637 1638 memset(lp, 0, sizeof(*lp)); 1639 1640 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1641 lp->d_secsize = vng->vng_secsize; 1642 lp->d_nsectors = vng->vng_nsectors; 1643 lp->d_ntracks = vng->vng_ntracks; 1644 lp->d_ncylinders = vng->vng_ncylinders; 1645 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1646 1647 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1648 lp->d_type = DTYPE_VND; 1649 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1650 lp->d_rpm = 3600; 1651 lp->d_interleave = 1; 1652 lp->d_flags = 0; 1653 1654 pp = &lp->d_partitions[RAW_PART]; 1655 pp->p_offset = 0; 1656 pp->p_size = lp->d_secperunit; 1657 pp->p_fstype = FS_UNUSED; 1658 lp->d_npartitions = RAW_PART + 1; 1659 1660 lp->d_magic = DISKMAGIC; 1661 lp->d_magic2 = DISKMAGIC; 1662 lp->d_checksum = dkcksum(lp); 1663 } 1664 1665 /* 1666 * Read the disklabel from a vnd. If one is not present, create a fake one. 1667 */ 1668 static void 1669 vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1670 { 1671 const char *errstring; 1672 struct disklabel *lp = sc->sc_dkdev.dk_label; 1673 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1674 int i; 1675 1676 memset(clp, 0, sizeof(*clp)); 1677 1678 vndgetdefaultlabel(sc, lp); 1679 1680 /* 1681 * Call the generic disklabel extraction routine. 1682 */ 1683 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1684 if (errstring) { 1685 /* 1686 * Lack of disklabel is common, but we print the warning 1687 * anyway, since it might contain other useful information. 1688 */ 1689 aprint_normal_dev(sc->sc_dev, "%s\n", errstring); 1690 1691 /* 1692 * For historical reasons, if there's no disklabel 1693 * present, all partitions must be FS_BSDFFS and 1694 * occupy the entire disk. 1695 */ 1696 for (i = 0; i < MAXPARTITIONS; i++) { 1697 /* 1698 * Don't wipe out port specific hack (such as 1699 * dos partition hack of i386 port). 1700 */ 1701 if (lp->d_partitions[i].p_size != 0) 1702 continue; 1703 1704 lp->d_partitions[i].p_size = lp->d_secperunit; 1705 lp->d_partitions[i].p_offset = 0; 1706 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1707 } 1708 1709 strncpy(lp->d_packname, "default label", 1710 sizeof(lp->d_packname)); 1711 1712 lp->d_npartitions = MAXPARTITIONS; 1713 lp->d_checksum = dkcksum(lp); 1714 } 1715 1716 /* In-core label now valid. */ 1717 sc->sc_flags |= VNF_VLABEL; 1718 } 1719 1720 /* 1721 * Wait interruptibly for an exclusive lock. 1722 * 1723 * XXX 1724 * Several drivers do this; it should be abstracted and made MP-safe. 1725 */ 1726 static int 1727 vndlock(struct vnd_softc *sc) 1728 { 1729 int error; 1730 1731 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1732 sc->sc_flags |= VNF_WANTED; 1733 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1734 return (error); 1735 } 1736 sc->sc_flags |= VNF_LOCKED; 1737 return (0); 1738 } 1739 1740 /* 1741 * Unlock and wake up any waiters. 1742 */ 1743 static void 1744 vndunlock(struct vnd_softc *sc) 1745 { 1746 1747 sc->sc_flags &= ~VNF_LOCKED; 1748 if ((sc->sc_flags & VNF_WANTED) != 0) { 1749 sc->sc_flags &= ~VNF_WANTED; 1750 wakeup(sc); 1751 } 1752 } 1753 1754 #ifdef VND_COMPRESSION 1755 /* compressed file read */ 1756 static void 1757 compstrategy(struct buf *bp, off_t bn) 1758 { 1759 int error; 1760 int unit = vndunit(bp->b_dev); 1761 struct vnd_softc *vnd = 1762 device_lookup_private(&vnd_cd, unit); 1763 u_int32_t comp_block; 1764 struct uio auio; 1765 char *addr; 1766 int s; 1767 1768 /* set up constants for data move */ 1769 auio.uio_rw = UIO_READ; 1770 UIO_SETUP_SYSSPACE(&auio); 1771 1772 /* read, and transfer the data */ 1773 addr = bp->b_data; 1774 bp->b_resid = bp->b_bcount; 1775 s = splbio(); 1776 while (bp->b_resid > 0) { 1777 unsigned length; 1778 size_t length_in_buffer; 1779 u_int32_t offset_in_buffer; 1780 struct iovec aiov; 1781 1782 /* calculate the compressed block number */ 1783 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1784 1785 /* check for good block number */ 1786 if (comp_block >= vnd->sc_comp_numoffs) { 1787 bp->b_error = EINVAL; 1788 splx(s); 1789 return; 1790 } 1791 1792 /* read in the compressed block, if not in buffer */ 1793 if (comp_block != vnd->sc_comp_buffblk) { 1794 length = vnd->sc_comp_offsets[comp_block + 1] - 1795 vnd->sc_comp_offsets[comp_block]; 1796 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1797 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1798 length, vnd->sc_comp_offsets[comp_block], 1799 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred, 1800 NULL, NULL); 1801 if (error) { 1802 bp->b_error = error; 1803 VOP_UNLOCK(vnd->sc_vp, 0); 1804 splx(s); 1805 return; 1806 } 1807 /* uncompress the buffer */ 1808 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1809 vnd->sc_comp_stream.avail_in = length; 1810 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1811 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1812 inflateReset(&vnd->sc_comp_stream); 1813 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1814 if (error != Z_STREAM_END) { 1815 if (vnd->sc_comp_stream.msg) 1816 aprint_normal_dev(vnd->sc_dev, 1817 "compressed file, %s\n", 1818 vnd->sc_comp_stream.msg); 1819 bp->b_error = EBADMSG; 1820 VOP_UNLOCK(vnd->sc_vp, 0); 1821 splx(s); 1822 return; 1823 } 1824 vnd->sc_comp_buffblk = comp_block; 1825 VOP_UNLOCK(vnd->sc_vp, 0); 1826 } 1827 1828 /* transfer the usable uncompressed data */ 1829 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1830 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1831 if (length_in_buffer > bp->b_resid) 1832 length_in_buffer = bp->b_resid; 1833 auio.uio_iov = &aiov; 1834 auio.uio_iovcnt = 1; 1835 aiov.iov_base = addr; 1836 aiov.iov_len = length_in_buffer; 1837 auio.uio_resid = aiov.iov_len; 1838 auio.uio_offset = 0; 1839 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1840 length_in_buffer, &auio); 1841 if (error) { 1842 bp->b_error = error; 1843 splx(s); 1844 return; 1845 } 1846 1847 bn += length_in_buffer; 1848 addr += length_in_buffer; 1849 bp->b_resid -= length_in_buffer; 1850 } 1851 splx(s); 1852 } 1853 1854 /* compression memory allocation routines */ 1855 static void * 1856 vnd_alloc(void *aux, u_int items, u_int siz) 1857 { 1858 return malloc(items * siz, M_TEMP, M_NOWAIT); 1859 } 1860 1861 static void 1862 vnd_free(void *aux, void *ptr) 1863 { 1864 free(ptr, M_TEMP); 1865 } 1866 #endif /* VND_COMPRESSION */ 1867 1868 static void 1869 vnd_set_properties(struct vnd_softc *vnd) 1870 { 1871 prop_dictionary_t disk_info, odisk_info, geom; 1872 1873 disk_info = prop_dictionary_create(); 1874 1875 geom = prop_dictionary_create(); 1876 1877 prop_dictionary_set_uint64(geom, "sectors-per-unit", 1878 vnd->sc_geom.vng_nsectors * vnd->sc_geom.vng_ntracks * 1879 vnd->sc_geom.vng_ncylinders); 1880 1881 prop_dictionary_set_uint32(geom, "sector-size", 1882 vnd->sc_geom.vng_secsize); 1883 1884 prop_dictionary_set_uint16(geom, "sectors-per-track", 1885 vnd->sc_geom.vng_nsectors); 1886 1887 prop_dictionary_set_uint16(geom, "tracks-per-cylinder", 1888 vnd->sc_geom.vng_ntracks); 1889 1890 prop_dictionary_set_uint64(geom, "cylinders-per-unit", 1891 vnd->sc_geom.vng_ncylinders); 1892 1893 prop_dictionary_set(disk_info, "geometry", geom); 1894 prop_object_release(geom); 1895 1896 prop_dictionary_set(device_properties(vnd->sc_dev), 1897 "disk-info", disk_info); 1898 1899 /* 1900 * Don't release disk_info here; we keep a reference to it. 1901 * disk_detach() will release it when we go away. 1902 */ 1903 1904 odisk_info = vnd->sc_dkdev.dk_info; 1905 vnd->sc_dkdev.dk_info = disk_info; 1906 if (odisk_info) 1907 prop_object_release(odisk_info); 1908 } 1909