1 /* $NetBSD: vnd.c,v 1.173 2007/12/12 03:54:27 smb Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 72 * 73 * @(#)vn.c 8.9 (Berkeley) 5/14/95 74 */ 75 76 /* 77 * Copyright (c) 1988 University of Utah. 78 * 79 * This code is derived from software contributed to Berkeley by 80 * the Systems Programming Group of the University of Utah Computer 81 * Science Department. 82 * 83 * Redistribution and use in source and binary forms, with or without 84 * modification, are permitted provided that the following conditions 85 * are met: 86 * 1. Redistributions of source code must retain the above copyright 87 * notice, this list of conditions and the following disclaimer. 88 * 2. Redistributions in binary form must reproduce the above copyright 89 * notice, this list of conditions and the following disclaimer in the 90 * documentation and/or other materials provided with the distribution. 91 * 3. All advertising materials mentioning features or use of this software 92 * must display the following acknowledgement: 93 * This product includes software developed by the University of 94 * California, Berkeley and its contributors. 95 * 4. Neither the name of the University nor the names of its contributors 96 * may be used to endorse or promote products derived from this software 97 * without specific prior written permission. 98 * 99 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 101 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 102 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 103 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 104 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 105 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 106 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 107 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 108 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 109 * SUCH DAMAGE. 110 * 111 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 112 * 113 * @(#)vn.c 8.9 (Berkeley) 5/14/95 114 */ 115 116 /* 117 * Vnode disk driver. 118 * 119 * Block/character interface to a vnode. Allows one to treat a file 120 * as a disk (e.g. build a filesystem in it, mount it, etc.). 121 * 122 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 123 * this uses them to avoid distorting the local buffer cache. If those 124 * block-level operations are not available, this falls back to the regular 125 * read and write calls. Using these may distort the cache in some cases 126 * but better have the driver working than preventing it to work on file 127 * systems where the block-level operations are not implemented for 128 * whatever reason. 129 * 130 * NOTE 2: There is a security issue involved with this driver. 131 * Once mounted all access to the contents of the "mapped" file via 132 * the special file is controlled by the permissions on the special 133 * file, the protection of the mapped file is ignored (effectively, 134 * by using root credentials in all transactions). 135 * 136 * NOTE 3: Doesn't interact with leases, should it? 137 */ 138 139 #include <sys/cdefs.h> 140 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.173 2007/12/12 03:54:27 smb Exp $"); 141 142 #if defined(_KERNEL_OPT) 143 #include "fs_nfs.h" 144 #include "opt_vnd.h" 145 #endif 146 147 #include <sys/param.h> 148 #include <sys/systm.h> 149 #include <sys/namei.h> 150 #include <sys/proc.h> 151 #include <sys/kthread.h> 152 #include <sys/errno.h> 153 #include <sys/buf.h> 154 #include <sys/bufq.h> 155 #include <sys/malloc.h> 156 #include <sys/ioctl.h> 157 #include <sys/disklabel.h> 158 #include <sys/device.h> 159 #include <sys/disk.h> 160 #include <sys/stat.h> 161 #include <sys/mount.h> 162 #include <sys/vnode.h> 163 #include <sys/file.h> 164 #include <sys/uio.h> 165 #include <sys/conf.h> 166 #include <sys/kauth.h> 167 168 #include <net/zlib.h> 169 170 #include <miscfs/genfs/genfs.h> 171 #include <miscfs/specfs/specdev.h> 172 173 #include <dev/vndvar.h> 174 175 #if defined(VNDDEBUG) && !defined(DEBUG) 176 #define DEBUG 177 #endif 178 179 #ifdef DEBUG 180 int dovndcluster = 1; 181 #define VDB_FOLLOW 0x01 182 #define VDB_INIT 0x02 183 #define VDB_IO 0x04 184 #define VDB_LABEL 0x08 185 int vnddebug = 0x00; 186 #endif 187 188 #define vndunit(x) DISKUNIT(x) 189 190 struct vndxfer { 191 struct buf vx_buf; 192 struct vnd_softc *vx_vnd; 193 }; 194 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 195 196 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 197 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 198 199 #define VNDLABELDEV(dev) \ 200 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 201 202 /* called by main() at boot time (XXX: and the LKM driver) */ 203 void vndattach(int); 204 205 static void vndclear(struct vnd_softc *, int); 206 static int vndsetcred(struct vnd_softc *, kauth_cred_t); 207 static void vndthrottle(struct vnd_softc *, struct vnode *); 208 static void vndiodone(struct buf *); 209 #if 0 210 static void vndshutdown(void); 211 #endif 212 213 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 214 static void vndgetdisklabel(dev_t, struct vnd_softc *); 215 216 static int vndlock(struct vnd_softc *); 217 static void vndunlock(struct vnd_softc *); 218 #ifdef VND_COMPRESSION 219 static void compstrategy(struct buf *, off_t); 220 static void *vnd_alloc(void *, u_int, u_int); 221 static void vnd_free(void *, void *); 222 #endif /* VND_COMPRESSION */ 223 224 static void vndthread(void *); 225 static bool vnode_has_op(const struct vnode *, int); 226 static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 227 struct buf *); 228 static void handle_with_strategy(struct vnd_softc *, const struct buf *, 229 struct buf *); 230 231 static dev_type_open(vndopen); 232 static dev_type_close(vndclose); 233 static dev_type_read(vndread); 234 static dev_type_write(vndwrite); 235 static dev_type_ioctl(vndioctl); 236 static dev_type_strategy(vndstrategy); 237 static dev_type_dump(vnddump); 238 static dev_type_size(vndsize); 239 240 const struct bdevsw vnd_bdevsw = { 241 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 242 }; 243 244 const struct cdevsw vnd_cdevsw = { 245 vndopen, vndclose, vndread, vndwrite, vndioctl, 246 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 247 }; 248 249 static int vnd_match(struct device *, struct cfdata *, void *); 250 static void vnd_attach(struct device *, struct device *, void *); 251 static int vnd_detach(struct device *, int); 252 253 CFATTACH_DECL(vnd, sizeof(struct vnd_softc), 254 vnd_match, vnd_attach, vnd_detach, NULL); 255 extern struct cfdriver vnd_cd; 256 257 static struct vnd_softc *vnd_spawn(int); 258 int vnd_destroy(struct device *); 259 260 void 261 vndattach(int num) 262 { 263 int error; 264 265 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 266 if (error) 267 aprint_error("%s: unable to register cfattach\n", 268 vnd_cd.cd_name); 269 } 270 271 static int 272 vnd_match(struct device *self, struct cfdata *cfdata, 273 void *aux) 274 { 275 return 1; 276 } 277 278 static void 279 vnd_attach(struct device *parent, struct device *self, 280 void *aux) 281 { 282 struct vnd_softc *sc = (struct vnd_softc *)self; 283 284 sc->sc_comp_offsets = NULL; 285 sc->sc_comp_buff = NULL; 286 sc->sc_comp_decombuf = NULL; 287 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 288 disk_init(&sc->sc_dkdev, self->dv_xname, NULL); 289 if (!pmf_device_register(self, NULL, NULL)) 290 aprint_error_dev(self, "couldn't establish power handler\n"); 291 } 292 293 static int 294 vnd_detach(struct device *self, int flags) 295 { 296 struct vnd_softc *sc = (struct vnd_softc *)self; 297 if (sc->sc_flags & VNF_INITED) 298 return EBUSY; 299 300 pmf_device_deregister(self); 301 bufq_free(sc->sc_tab); 302 disk_destroy(&sc->sc_dkdev); 303 304 return 0; 305 } 306 307 static struct vnd_softc * 308 vnd_spawn(int unit) 309 { 310 struct cfdata *cf; 311 312 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 313 cf->cf_name = vnd_cd.cd_name; 314 cf->cf_atname = vnd_cd.cd_name; 315 cf->cf_unit = unit; 316 cf->cf_fstate = FSTATE_STAR; 317 318 return (struct vnd_softc *)config_attach_pseudo(cf); 319 } 320 321 int 322 vnd_destroy(struct device *dev) 323 { 324 int error; 325 struct cfdata *cf; 326 327 cf = device_cfdata(dev); 328 error = config_detach(dev, DETACH_QUIET); 329 if (error) 330 return error; 331 free(cf, M_DEVBUF); 332 return 0; 333 } 334 335 static int 336 vndopen(dev_t dev, int flags, int mode, struct lwp *l) 337 { 338 int unit = vndunit(dev); 339 struct vnd_softc *sc; 340 int error = 0, part, pmask; 341 struct disklabel *lp; 342 343 #ifdef DEBUG 344 if (vnddebug & VDB_FOLLOW) 345 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 346 #endif 347 sc = device_lookup(&vnd_cd, unit); 348 if (sc == NULL) { 349 sc = vnd_spawn(unit); 350 if (sc == NULL) 351 return ENOMEM; 352 } 353 354 if ((error = vndlock(sc)) != 0) 355 return (error); 356 357 lp = sc->sc_dkdev.dk_label; 358 359 part = DISKPART(dev); 360 pmask = (1 << part); 361 362 /* 363 * If we're initialized, check to see if there are any other 364 * open partitions. If not, then it's safe to update the 365 * in-core disklabel. Only read the disklabel if it is 366 * not already valid. 367 */ 368 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 369 sc->sc_dkdev.dk_openmask == 0) 370 vndgetdisklabel(dev, sc); 371 372 /* Check that the partitions exists. */ 373 if (part != RAW_PART) { 374 if (((sc->sc_flags & VNF_INITED) == 0) || 375 ((part >= lp->d_npartitions) || 376 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 377 error = ENXIO; 378 goto done; 379 } 380 } 381 382 /* Prevent our unit from being unconfigured while open. */ 383 switch (mode) { 384 case S_IFCHR: 385 sc->sc_dkdev.dk_copenmask |= pmask; 386 break; 387 388 case S_IFBLK: 389 sc->sc_dkdev.dk_bopenmask |= pmask; 390 break; 391 } 392 sc->sc_dkdev.dk_openmask = 393 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 394 395 done: 396 vndunlock(sc); 397 return (error); 398 } 399 400 static int 401 vndclose(dev_t dev, int flags, int mode, struct lwp *l) 402 { 403 int unit = vndunit(dev); 404 struct vnd_softc *sc; 405 int error = 0, part; 406 407 #ifdef DEBUG 408 if (vnddebug & VDB_FOLLOW) 409 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 410 #endif 411 sc = device_lookup(&vnd_cd, unit); 412 if (sc == NULL) 413 return ENXIO; 414 415 if ((error = vndlock(sc)) != 0) 416 return (error); 417 418 part = DISKPART(dev); 419 420 /* ...that much closer to allowing unconfiguration... */ 421 switch (mode) { 422 case S_IFCHR: 423 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 424 break; 425 426 case S_IFBLK: 427 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 428 break; 429 } 430 sc->sc_dkdev.dk_openmask = 431 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 432 433 vndunlock(sc); 434 435 if ((sc->sc_flags & VNF_INITED) == 0) { 436 if ((error = vnd_destroy((struct device *)sc)) != 0) { 437 aprint_error("%s: unable to detach instance\n", 438 sc->sc_dev.dv_xname); 439 return error; 440 } 441 } 442 443 return (0); 444 } 445 446 /* 447 * Queue the request, and wakeup the kernel thread to handle it. 448 */ 449 static void 450 vndstrategy(struct buf *bp) 451 { 452 int unit = vndunit(bp->b_dev); 453 struct vnd_softc *vnd = 454 (struct vnd_softc *)device_lookup(&vnd_cd, unit); 455 struct disklabel *lp = vnd->sc_dkdev.dk_label; 456 daddr_t blkno; 457 int s = splbio(); 458 459 if ((vnd->sc_flags & VNF_INITED) == 0) { 460 bp->b_error = ENXIO; 461 goto done; 462 } 463 464 /* 465 * The transfer must be a whole number of blocks. 466 */ 467 if ((bp->b_bcount % lp->d_secsize) != 0) { 468 bp->b_error = EINVAL; 469 goto done; 470 } 471 472 /* 473 * check if we're read-only. 474 */ 475 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 476 bp->b_error = EACCES; 477 goto done; 478 } 479 480 /* If it's a nil transfer, wake up the top half now. */ 481 if (bp->b_bcount == 0) { 482 goto done; 483 } 484 485 /* 486 * Do bounds checking and adjust transfer. If there's an error, 487 * the bounds check will flag that for us. 488 */ 489 if (DISKPART(bp->b_dev) == RAW_PART) { 490 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 491 vnd->sc_size) <= 0) 492 goto done; 493 } else { 494 if (bounds_check_with_label(&vnd->sc_dkdev, 495 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 496 goto done; 497 } 498 499 /* 500 * Put the block number in terms of the logical blocksize 501 * of the "device". 502 */ 503 504 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 505 506 /* 507 * Translate the partition-relative block number to an absolute. 508 */ 509 if (DISKPART(bp->b_dev) != RAW_PART) { 510 struct partition *pp; 511 512 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 513 DISKPART(bp->b_dev)]; 514 blkno += pp->p_offset; 515 } 516 bp->b_rawblkno = blkno; 517 518 #ifdef DEBUG 519 if (vnddebug & VDB_FOLLOW) 520 printf("vndstrategy(%p): unit %d\n", bp, unit); 521 #endif 522 BUFQ_PUT(vnd->sc_tab, bp); 523 wakeup(&vnd->sc_tab); 524 splx(s); 525 return; 526 527 done: 528 bp->b_resid = bp->b_bcount; 529 biodone(bp); 530 splx(s); 531 } 532 533 static void 534 vndthread(void *arg) 535 { 536 struct vnd_softc *vnd = arg; 537 bool usestrategy; 538 int s; 539 540 /* Determine whether we can use VOP_BMAP and VOP_STRATEGY to 541 * directly access the backing vnode. If we can, use these two 542 * operations to avoid messing with the local buffer cache. 543 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 544 * which are guaranteed to work with any file system. */ 545 usestrategy = vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 546 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 547 548 #ifdef DEBUG 549 if (vnddebug & VDB_INIT) 550 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 551 usestrategy ? 552 "using bmap/strategy operations" : 553 "using read/write operations"); 554 #endif 555 556 s = splbio(); 557 vnd->sc_flags |= VNF_KTHREAD; 558 wakeup(&vnd->sc_kthread); 559 560 /* 561 * Dequeue requests and serve them depending on the available 562 * vnode operations. 563 */ 564 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 565 struct vndxfer *vnx; 566 int flags; 567 struct buf *obp; 568 struct buf *bp; 569 570 obp = BUFQ_GET(vnd->sc_tab); 571 if (obp == NULL) { 572 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 573 continue; 574 }; 575 splx(s); 576 flags = obp->b_flags; 577 #ifdef DEBUG 578 if (vnddebug & VDB_FOLLOW) 579 printf("vndthread(%p\n", obp); 580 #endif 581 582 if (vnd->sc_vp->v_mount == NULL) { 583 obp->b_error = ENXIO; 584 goto done; 585 } 586 #ifdef VND_COMPRESSION 587 /* handle a compressed read */ 588 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 589 off_t bn; 590 591 /* Convert to a byte offset within the file. */ 592 bn = obp->b_rawblkno * 593 vnd->sc_dkdev.dk_label->d_secsize; 594 595 compstrategy(obp, bn); 596 goto done; 597 } 598 #endif /* VND_COMPRESSION */ 599 600 /* 601 * Allocate a header for this transfer and link it to the 602 * buffer 603 */ 604 s = splbio(); 605 vnx = VND_GETXFER(vnd); 606 splx(s); 607 vnx->vx_vnd = vnd; 608 609 s = splbio(); 610 while (vnd->sc_active >= vnd->sc_maxactive) { 611 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 612 } 613 vnd->sc_active++; 614 splx(s); 615 616 /* Instrumentation. */ 617 disk_busy(&vnd->sc_dkdev); 618 619 bp = &vnx->vx_buf; 620 BUF_INIT(bp); 621 bp->b_flags = (obp->b_flags & B_READ) | B_CALL; 622 bp->b_iodone = vndiodone; 623 bp->b_private = obp; 624 bp->b_vp = vnd->sc_vp; 625 bp->b_data = obp->b_data; 626 bp->b_bcount = obp->b_bcount; 627 BIO_COPYPRIO(bp, obp); 628 629 /* Handle the request using the appropriate operations. */ 630 if (usestrategy) 631 handle_with_strategy(vnd, obp, bp); 632 else 633 handle_with_rdwr(vnd, obp, bp); 634 635 s = splbio(); 636 continue; 637 638 done: 639 biodone(obp); 640 s = splbio(); 641 } 642 643 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 644 wakeup(&vnd->sc_kthread); 645 splx(s); 646 kthread_exit(0); 647 } 648 649 /* 650 * Checks if the given vnode supports the requested operation. 651 * The operation is specified the offset returned by VOFFSET. 652 * 653 * XXX The test below used to determine this is quite fragile 654 * because it relies on the file system to use genfs to specify 655 * unimplemented operations. There might be another way to do 656 * it more cleanly. 657 */ 658 static bool 659 vnode_has_op(const struct vnode *vp, int opoffset) 660 { 661 int (*defaultp)(void *); 662 int (*opp)(void *); 663 664 defaultp = vp->v_op[VOFFSET(vop_default)]; 665 opp = vp->v_op[opoffset]; 666 667 return opp != defaultp && opp != genfs_eopnotsupp && 668 opp != genfs_badop && opp != genfs_nullop; 669 } 670 671 /* 672 * Handes the read/write request given in 'bp' using the vnode's VOP_READ 673 * and VOP_WRITE operations. 674 * 675 * 'obp' is a pointer to the original request fed to the vnd device. 676 */ 677 static void 678 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 679 { 680 bool doread; 681 off_t offset; 682 size_t resid; 683 struct vnode *vp; 684 685 doread = bp->b_flags & B_READ; 686 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 687 vp = vnd->sc_vp; 688 689 #if defined(DEBUG) 690 if (vnddebug & VDB_IO) 691 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 692 ", secsize %d, offset %" PRIu64 693 ", bcount %d\n", 694 vp, doread ? "read" : "write", obp->b_rawblkno, 695 vnd->sc_dkdev.dk_label->d_secsize, offset, 696 bp->b_bcount); 697 #endif 698 699 /* Issue the read or write operation. */ 700 bp->b_error = 701 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 702 vp, bp->b_data, bp->b_bcount, offset, 703 UIO_SYSSPACE, 0, vnd->sc_cred, &resid, NULL); 704 bp->b_resid = resid; 705 706 /* We need to increase the number of outputs on the vnode if 707 * there was any write to it. */ 708 if (!doread) 709 V_INCR_NUMOUTPUT(vp); 710 711 biodone(bp); 712 } 713 714 /* 715 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 716 * and VOP_STRATEGY operations. 717 * 718 * 'obp' is a pointer to the original request fed to the vnd device. 719 */ 720 static void 721 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 722 struct buf *bp) 723 { 724 int bsize, error, flags, skipped; 725 size_t resid, sz; 726 off_t bn, offset; 727 728 flags = obp->b_flags; 729 730 if (!(flags & B_READ)) { 731 int s; 732 733 s = splbio(); 734 V_INCR_NUMOUTPUT(bp->b_vp); 735 splx(s); 736 } 737 738 /* convert to a byte offset within the file. */ 739 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 740 741 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 742 skipped = 0; 743 744 /* 745 * Break the request into bsize pieces and feed them 746 * sequentially using VOP_BMAP/VOP_STRATEGY. 747 * We do it this way to keep from flooding NFS servers if we 748 * are connected to an NFS file. This places the burden on 749 * the client rather than the server. 750 */ 751 error = 0; 752 bp->b_resid = bp->b_bcount; 753 for (offset = 0, resid = bp->b_resid; resid; 754 resid -= sz, offset += sz) { 755 struct buf *nbp; 756 struct vnode *vp; 757 daddr_t nbn; 758 int off, nra; 759 760 nra = 0; 761 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 762 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 763 VOP_UNLOCK(vnd->sc_vp, 0); 764 765 if (error == 0 && (long)nbn == -1) 766 error = EIO; 767 768 /* 769 * If there was an error or a hole in the file...punt. 770 * Note that we may have to wait for any operations 771 * that we have already fired off before releasing 772 * the buffer. 773 * 774 * XXX we could deal with holes here but it would be 775 * a hassle (in the write case). 776 */ 777 if (error) { 778 skipped += resid; 779 break; 780 } 781 782 #ifdef DEBUG 783 if (!dovndcluster) 784 nra = 0; 785 #endif 786 787 off = bn % bsize; 788 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 789 #ifdef DEBUG 790 if (vnddebug & VDB_IO) 791 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 792 " sz 0x%zx\n", 793 vnd->sc_vp, vp, (long long)bn, nbn, sz); 794 #endif 795 796 nbp = getiobuf(); 797 nestiobuf_setup(bp, nbp, offset, sz); 798 nbp->b_blkno = nbn + btodb(off); 799 800 #if 0 /* XXX #ifdef DEBUG */ 801 if (vnddebug & VDB_IO) 802 printf("vndstart(%ld): bp %p vp %p blkno " 803 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 804 (long) (vnd-vnd_softc), &nbp->vb_buf, 805 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 806 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 807 nbp->vb_buf.b_bcount); 808 #endif 809 VOP_STRATEGY(vp, nbp); 810 bn += sz; 811 } 812 nestiobuf_done(bp, skipped, error); 813 } 814 815 static void 816 vndiodone(struct buf *bp) 817 { 818 struct vndxfer *vnx = VND_BUFTOXFER(bp); 819 struct vnd_softc *vnd = vnx->vx_vnd; 820 struct buf *obp = bp->b_private; 821 822 KASSERT(&vnx->vx_buf == bp); 823 KASSERT(vnd->sc_active > 0); 824 #ifdef DEBUG 825 if (vnddebug & VDB_IO) { 826 printf("vndiodone1: bp %p iodone: error %d\n", 827 bp, bp->b_error); 828 } 829 #endif 830 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 831 (bp->b_flags & B_READ)); 832 vnd->sc_active--; 833 if (vnd->sc_active == 0) { 834 wakeup(&vnd->sc_tab); 835 } 836 obp->b_error = bp->b_error; 837 obp->b_resid = bp->b_resid; 838 VND_PUTXFER(vnd, vnx); 839 biodone(obp); 840 } 841 842 /* ARGSUSED */ 843 static int 844 vndread(dev_t dev, struct uio *uio, int flags) 845 { 846 int unit = vndunit(dev); 847 struct vnd_softc *sc; 848 849 #ifdef DEBUG 850 if (vnddebug & VDB_FOLLOW) 851 printf("vndread(0x%x, %p)\n", dev, uio); 852 #endif 853 854 sc = device_lookup(&vnd_cd, unit); 855 if (sc == NULL) 856 return ENXIO; 857 858 if ((sc->sc_flags & VNF_INITED) == 0) 859 return (ENXIO); 860 861 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 862 } 863 864 /* ARGSUSED */ 865 static int 866 vndwrite(dev_t dev, struct uio *uio, int flags) 867 { 868 int unit = vndunit(dev); 869 struct vnd_softc *sc; 870 871 #ifdef DEBUG 872 if (vnddebug & VDB_FOLLOW) 873 printf("vndwrite(0x%x, %p)\n", dev, uio); 874 #endif 875 876 sc = device_lookup(&vnd_cd, unit); 877 if (sc == NULL) 878 return ENXIO; 879 880 if ((sc->sc_flags & VNF_INITED) == 0) 881 return (ENXIO); 882 883 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 884 } 885 886 static int 887 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 888 { 889 struct vnd_softc *vnd; 890 891 if (*un == -1) 892 *un = unit; 893 if (*un < 0) 894 return EINVAL; 895 896 vnd = device_lookup(&vnd_cd, *un); 897 if (vnd == NULL) 898 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1; 899 900 if ((vnd->sc_flags & VNF_INITED) == 0) 901 return -1; 902 903 return VOP_GETATTR(vnd->sc_vp, va, l->l_cred); 904 } 905 906 /* ARGSUSED */ 907 static int 908 vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 909 { 910 int unit = vndunit(dev); 911 struct vnd_softc *vnd; 912 struct vnd_ioctl *vio; 913 struct vattr vattr; 914 struct nameidata nd; 915 int error, part, pmask; 916 size_t geomsize; 917 int fflags; 918 #ifdef __HAVE_OLD_DISKLABEL 919 struct disklabel newlabel; 920 #endif 921 922 #ifdef DEBUG 923 if (vnddebug & VDB_FOLLOW) 924 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 925 dev, cmd, data, flag, l->l_proc, unit); 926 #endif 927 vnd = device_lookup(&vnd_cd, unit); 928 if (vnd == NULL && 929 #ifdef COMPAT_30 930 cmd != VNDIOOCGET && 931 #endif 932 cmd != VNDIOCGET) 933 return ENXIO; 934 vio = (struct vnd_ioctl *)data; 935 936 /* Must be open for writes for these commands... */ 937 switch (cmd) { 938 case VNDIOCSET: 939 case VNDIOCCLR: 940 case DIOCSDINFO: 941 case DIOCWDINFO: 942 #ifdef __HAVE_OLD_DISKLABEL 943 case ODIOCSDINFO: 944 case ODIOCWDINFO: 945 #endif 946 case DIOCKLABEL: 947 case DIOCWLABEL: 948 if ((flag & FWRITE) == 0) 949 return (EBADF); 950 } 951 952 /* Must be initialized for these... */ 953 switch (cmd) { 954 case VNDIOCCLR: 955 case DIOCGDINFO: 956 case DIOCSDINFO: 957 case DIOCWDINFO: 958 case DIOCGPART: 959 case DIOCKLABEL: 960 case DIOCWLABEL: 961 case DIOCGDEFLABEL: 962 #ifdef __HAVE_OLD_DISKLABEL 963 case ODIOCGDINFO: 964 case ODIOCSDINFO: 965 case ODIOCWDINFO: 966 case ODIOCGDEFLABEL: 967 #endif 968 if ((vnd->sc_flags & VNF_INITED) == 0) 969 return (ENXIO); 970 } 971 972 switch (cmd) { 973 case VNDIOCSET: 974 if (vnd->sc_flags & VNF_INITED) 975 return (EBUSY); 976 977 if ((error = vndlock(vnd)) != 0) 978 return (error); 979 980 fflags = FREAD; 981 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 982 fflags |= FWRITE; 983 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file); 984 if ((error = vn_open(&nd, fflags, 0)) != 0) 985 goto unlock_and_exit; 986 KASSERT(l); 987 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred); 988 if (!error && nd.ni_vp->v_type != VREG) 989 error = EOPNOTSUPP; 990 if (error) { 991 VOP_UNLOCK(nd.ni_vp, 0); 992 goto close_and_exit; 993 } 994 995 /* If using a compressed file, initialize its info */ 996 /* (or abort with an error if kernel has no compression) */ 997 if (vio->vnd_flags & VNF_COMP) { 998 #ifdef VND_COMPRESSION 999 struct vnd_comp_header *ch; 1000 int i; 1001 u_int32_t comp_size; 1002 u_int32_t comp_maxsize; 1003 1004 /* allocate space for compresed file header */ 1005 ch = malloc(sizeof(struct vnd_comp_header), 1006 M_TEMP, M_WAITOK); 1007 1008 /* read compressed file header */ 1009 error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch, 1010 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1011 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1012 if(error) { 1013 free(ch, M_TEMP); 1014 VOP_UNLOCK(nd.ni_vp, 0); 1015 goto close_and_exit; 1016 } 1017 1018 /* save some header info */ 1019 vnd->sc_comp_blksz = ntohl(ch->block_size); 1020 /* note last offset is the file byte size */ 1021 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 1022 free(ch, M_TEMP); 1023 if (vnd->sc_comp_blksz == 0 || 1024 vnd->sc_comp_blksz % DEV_BSIZE !=0) { 1025 VOP_UNLOCK(nd.ni_vp, 0); 1026 error = EINVAL; 1027 goto close_and_exit; 1028 } 1029 if(sizeof(struct vnd_comp_header) + 1030 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 1031 vattr.va_size) { 1032 VOP_UNLOCK(nd.ni_vp, 0); 1033 error = EINVAL; 1034 goto close_and_exit; 1035 } 1036 1037 /* set decompressed file size */ 1038 vattr.va_size = 1039 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1040 (u_quad_t)vnd->sc_comp_blksz; 1041 1042 /* allocate space for all the compressed offsets */ 1043 vnd->sc_comp_offsets = 1044 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 1045 M_DEVBUF, M_WAITOK); 1046 1047 /* read in the offsets */ 1048 error = vn_rdwr(UIO_READ, nd.ni_vp, 1049 (void *)vnd->sc_comp_offsets, 1050 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 1051 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1052 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1053 if(error) { 1054 VOP_UNLOCK(nd.ni_vp, 0); 1055 goto close_and_exit; 1056 } 1057 /* 1058 * find largest block size (used for allocation limit). 1059 * Also convert offset to native byte order. 1060 */ 1061 comp_maxsize = 0; 1062 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1063 vnd->sc_comp_offsets[i] = 1064 be64toh(vnd->sc_comp_offsets[i]); 1065 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 1066 - vnd->sc_comp_offsets[i]; 1067 if (comp_size > comp_maxsize) 1068 comp_maxsize = comp_size; 1069 } 1070 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1071 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 1072 1073 /* create compressed data buffer */ 1074 vnd->sc_comp_buff = malloc(comp_maxsize, 1075 M_DEVBUF, M_WAITOK); 1076 1077 /* create decompressed buffer */ 1078 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1079 M_DEVBUF, M_WAITOK); 1080 vnd->sc_comp_buffblk = -1; 1081 1082 /* Initialize decompress stream */ 1083 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 1084 vnd->sc_comp_stream.zalloc = vnd_alloc; 1085 vnd->sc_comp_stream.zfree = vnd_free; 1086 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1087 if(error) { 1088 if(vnd->sc_comp_stream.msg) 1089 printf("vnd%d: compressed file, %s\n", 1090 unit, vnd->sc_comp_stream.msg); 1091 VOP_UNLOCK(nd.ni_vp, 0); 1092 error = EINVAL; 1093 goto close_and_exit; 1094 } 1095 1096 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1097 #else /* !VND_COMPRESSION */ 1098 VOP_UNLOCK(nd.ni_vp, 0); 1099 error = EOPNOTSUPP; 1100 goto close_and_exit; 1101 #endif /* VND_COMPRESSION */ 1102 } 1103 1104 VOP_UNLOCK(nd.ni_vp, 0); 1105 vnd->sc_vp = nd.ni_vp; 1106 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1107 1108 /* 1109 * Use pseudo-geometry specified. If none was provided, 1110 * use "standard" Adaptec fictitious geometry. 1111 */ 1112 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1113 1114 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1115 sizeof(vio->vnd_geom)); 1116 1117 /* 1118 * Sanity-check the sector size. 1119 * XXX Don't allow secsize < DEV_BSIZE. Should 1120 * XXX we? 1121 */ 1122 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 1123 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 1124 vnd->sc_geom.vng_ncylinders == 0 || 1125 (vnd->sc_geom.vng_ntracks * 1126 vnd->sc_geom.vng_nsectors) == 0) { 1127 error = EINVAL; 1128 goto close_and_exit; 1129 } 1130 1131 /* 1132 * Compute the size (in DEV_BSIZE blocks) specified 1133 * by the geometry. 1134 */ 1135 geomsize = (vnd->sc_geom.vng_nsectors * 1136 vnd->sc_geom.vng_ntracks * 1137 vnd->sc_geom.vng_ncylinders) * 1138 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1139 1140 /* 1141 * Sanity-check the size against the specified 1142 * geometry. 1143 */ 1144 if (vnd->sc_size < geomsize) { 1145 error = EINVAL; 1146 goto close_and_exit; 1147 } 1148 } else if (vnd->sc_size >= (32 * 64)) { 1149 /* 1150 * Size must be at least 2048 DEV_BSIZE blocks 1151 * (1M) in order to use this geometry. 1152 */ 1153 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1154 vnd->sc_geom.vng_nsectors = 32; 1155 vnd->sc_geom.vng_ntracks = 64; 1156 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1157 } else { 1158 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1159 vnd->sc_geom.vng_nsectors = 1; 1160 vnd->sc_geom.vng_ntracks = 1; 1161 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1162 } 1163 1164 if (vio->vnd_flags & VNDIOF_READONLY) { 1165 vnd->sc_flags |= VNF_READONLY; 1166 } 1167 1168 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1169 goto close_and_exit; 1170 1171 vndthrottle(vnd, vnd->sc_vp); 1172 vio->vnd_size = dbtob(vnd->sc_size); 1173 vnd->sc_flags |= VNF_INITED; 1174 1175 /* create the kernel thread, wait for it to be up */ 1176 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, 1177 &vnd->sc_kthread, vnd->sc_dev.dv_xname); 1178 if (error) 1179 goto close_and_exit; 1180 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1181 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1182 } 1183 #ifdef DEBUG 1184 if (vnddebug & VDB_INIT) 1185 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1186 vnd->sc_vp, (unsigned long) vnd->sc_size, 1187 vnd->sc_geom.vng_secsize, 1188 vnd->sc_geom.vng_nsectors, 1189 vnd->sc_geom.vng_ntracks, 1190 vnd->sc_geom.vng_ncylinders); 1191 #endif 1192 1193 /* Attach the disk. */ 1194 disk_attach(&vnd->sc_dkdev); 1195 1196 /* Initialize the xfer and buffer pools. */ 1197 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1198 0, 0, "vndxpl", NULL, IPL_BIO); 1199 1200 /* Try and read the disklabel. */ 1201 vndgetdisklabel(dev, vnd); 1202 1203 vndunlock(vnd); 1204 1205 break; 1206 1207 close_and_exit: 1208 (void) vn_close(nd.ni_vp, fflags, l->l_cred, l); 1209 unlock_and_exit: 1210 #ifdef VND_COMPRESSION 1211 /* free any allocated memory (for compressed file) */ 1212 if(vnd->sc_comp_offsets) { 1213 free(vnd->sc_comp_offsets, M_DEVBUF); 1214 vnd->sc_comp_offsets = NULL; 1215 } 1216 if(vnd->sc_comp_buff) { 1217 free(vnd->sc_comp_buff, M_DEVBUF); 1218 vnd->sc_comp_buff = NULL; 1219 } 1220 if(vnd->sc_comp_decombuf) { 1221 free(vnd->sc_comp_decombuf, M_DEVBUF); 1222 vnd->sc_comp_decombuf = NULL; 1223 } 1224 #endif /* VND_COMPRESSION */ 1225 vndunlock(vnd); 1226 return (error); 1227 1228 case VNDIOCCLR: 1229 if ((error = vndlock(vnd)) != 0) 1230 return (error); 1231 1232 /* 1233 * Don't unconfigure if any other partitions are open 1234 * or if both the character and block flavors of this 1235 * partition are open. 1236 */ 1237 part = DISKPART(dev); 1238 pmask = (1 << part); 1239 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1240 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1241 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1242 !(vio->vnd_flags & VNDIOF_FORCE)) { 1243 vndunlock(vnd); 1244 return (EBUSY); 1245 } 1246 1247 /* 1248 * XXX vndclear() might call vndclose() implicitely; 1249 * release lock to avoid recursion 1250 */ 1251 vndunlock(vnd); 1252 vndclear(vnd, minor(dev)); 1253 #ifdef DEBUG 1254 if (vnddebug & VDB_INIT) 1255 printf("vndioctl: CLRed\n"); 1256 #endif 1257 1258 /* Destroy the xfer and buffer pools. */ 1259 pool_destroy(&vnd->sc_vxpool); 1260 1261 /* Detatch the disk. */ 1262 disk_detach(&vnd->sc_dkdev); 1263 break; 1264 1265 #ifdef COMPAT_30 1266 case VNDIOOCGET: { 1267 struct vnd_ouser *vnu; 1268 struct vattr va; 1269 vnu = (struct vnd_ouser *)data; 1270 KASSERT(l); 1271 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1272 case 0: 1273 vnu->vnu_dev = va.va_fsid; 1274 vnu->vnu_ino = va.va_fileid; 1275 break; 1276 case -1: 1277 /* unused is not an error */ 1278 vnu->vnu_dev = 0; 1279 vnu->vnu_ino = 0; 1280 break; 1281 default: 1282 return error; 1283 } 1284 break; 1285 } 1286 #endif 1287 case VNDIOCGET: { 1288 struct vnd_user *vnu; 1289 struct vattr va; 1290 vnu = (struct vnd_user *)data; 1291 KASSERT(l); 1292 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1293 case 0: 1294 vnu->vnu_dev = va.va_fsid; 1295 vnu->vnu_ino = va.va_fileid; 1296 break; 1297 case -1: 1298 /* unused is not an error */ 1299 vnu->vnu_dev = 0; 1300 vnu->vnu_ino = 0; 1301 break; 1302 default: 1303 return error; 1304 } 1305 break; 1306 } 1307 1308 case DIOCGDINFO: 1309 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1310 break; 1311 1312 #ifdef __HAVE_OLD_DISKLABEL 1313 case ODIOCGDINFO: 1314 newlabel = *(vnd->sc_dkdev.dk_label); 1315 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1316 return ENOTTY; 1317 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1318 break; 1319 #endif 1320 1321 case DIOCGPART: 1322 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1323 ((struct partinfo *)data)->part = 1324 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1325 break; 1326 1327 case DIOCWDINFO: 1328 case DIOCSDINFO: 1329 #ifdef __HAVE_OLD_DISKLABEL 1330 case ODIOCWDINFO: 1331 case ODIOCSDINFO: 1332 #endif 1333 { 1334 struct disklabel *lp; 1335 1336 if ((error = vndlock(vnd)) != 0) 1337 return (error); 1338 1339 vnd->sc_flags |= VNF_LABELLING; 1340 1341 #ifdef __HAVE_OLD_DISKLABEL 1342 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1343 memset(&newlabel, 0, sizeof newlabel); 1344 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1345 lp = &newlabel; 1346 } else 1347 #endif 1348 lp = (struct disklabel *)data; 1349 1350 error = setdisklabel(vnd->sc_dkdev.dk_label, 1351 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1352 if (error == 0) { 1353 if (cmd == DIOCWDINFO 1354 #ifdef __HAVE_OLD_DISKLABEL 1355 || cmd == ODIOCWDINFO 1356 #endif 1357 ) 1358 error = writedisklabel(VNDLABELDEV(dev), 1359 vndstrategy, vnd->sc_dkdev.dk_label, 1360 vnd->sc_dkdev.dk_cpulabel); 1361 } 1362 1363 vnd->sc_flags &= ~VNF_LABELLING; 1364 1365 vndunlock(vnd); 1366 1367 if (error) 1368 return (error); 1369 break; 1370 } 1371 1372 case DIOCKLABEL: 1373 if (*(int *)data != 0) 1374 vnd->sc_flags |= VNF_KLABEL; 1375 else 1376 vnd->sc_flags &= ~VNF_KLABEL; 1377 break; 1378 1379 case DIOCWLABEL: 1380 if (*(int *)data != 0) 1381 vnd->sc_flags |= VNF_WLABEL; 1382 else 1383 vnd->sc_flags &= ~VNF_WLABEL; 1384 break; 1385 1386 case DIOCGDEFLABEL: 1387 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1388 break; 1389 1390 #ifdef __HAVE_OLD_DISKLABEL 1391 case ODIOCGDEFLABEL: 1392 vndgetdefaultlabel(vnd, &newlabel); 1393 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1394 return ENOTTY; 1395 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1396 break; 1397 #endif 1398 1399 default: 1400 return (ENOTTY); 1401 } 1402 1403 return (0); 1404 } 1405 1406 /* 1407 * Duplicate the current processes' credentials. Since we are called only 1408 * as the result of a SET ioctl and only root can do that, any future access 1409 * to this "disk" is essentially as root. Note that credentials may change 1410 * if some other uid can write directly to the mapped file (NFS). 1411 */ 1412 static int 1413 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1414 { 1415 struct uio auio; 1416 struct iovec aiov; 1417 char *tmpbuf; 1418 int error; 1419 1420 vnd->sc_cred = kauth_cred_dup(cred); 1421 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1422 1423 /* XXX: Horrible kludge to establish credentials for NFS */ 1424 aiov.iov_base = tmpbuf; 1425 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1426 auio.uio_iov = &aiov; 1427 auio.uio_iovcnt = 1; 1428 auio.uio_offset = 0; 1429 auio.uio_rw = UIO_READ; 1430 auio.uio_resid = aiov.iov_len; 1431 UIO_SETUP_SYSSPACE(&auio); 1432 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1433 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1434 if (error == 0) { 1435 /* 1436 * Because vnd does all IO directly through the vnode 1437 * we need to flush (at least) the buffer from the above 1438 * VOP_READ from the buffer cache to prevent cache 1439 * incoherencies. Also, be careful to write dirty 1440 * buffers back to stable storage. 1441 */ 1442 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1443 curlwp, 0, 0); 1444 } 1445 VOP_UNLOCK(vnd->sc_vp, 0); 1446 1447 free(tmpbuf, M_TEMP); 1448 return (error); 1449 } 1450 1451 /* 1452 * Set maxactive based on FS type 1453 */ 1454 static void 1455 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1456 { 1457 #ifdef NFS 1458 extern int (**nfsv2_vnodeop_p)(void *); 1459 1460 if (vp->v_op == nfsv2_vnodeop_p) 1461 vnd->sc_maxactive = 2; 1462 else 1463 #endif 1464 vnd->sc_maxactive = 8; 1465 1466 if (vnd->sc_maxactive < 1) 1467 vnd->sc_maxactive = 1; 1468 } 1469 1470 #if 0 1471 static void 1472 vndshutdown(void) 1473 { 1474 struct vnd_softc *vnd; 1475 1476 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1477 if (vnd->sc_flags & VNF_INITED) 1478 vndclear(vnd); 1479 } 1480 #endif 1481 1482 static void 1483 vndclear(struct vnd_softc *vnd, int myminor) 1484 { 1485 struct vnode *vp = vnd->sc_vp; 1486 struct lwp *l = curlwp; 1487 int fflags = FREAD; 1488 int bmaj, cmaj, i, mn; 1489 int s; 1490 1491 #ifdef DEBUG 1492 if (vnddebug & VDB_FOLLOW) 1493 printf("vndclear(%p): vp %p\n", vnd, vp); 1494 #endif 1495 /* locate the major number */ 1496 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1497 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1498 1499 /* Nuke the vnodes for any open instances */ 1500 for (i = 0; i < MAXPARTITIONS; i++) { 1501 mn = DISKMINOR(device_unit(&vnd->sc_dev), i); 1502 vdevgone(bmaj, mn, mn, VBLK); 1503 if (mn != myminor) /* XXX avoid to kill own vnode */ 1504 vdevgone(cmaj, mn, mn, VCHR); 1505 } 1506 1507 if ((vnd->sc_flags & VNF_READONLY) == 0) 1508 fflags |= FWRITE; 1509 1510 s = splbio(); 1511 bufq_drain(vnd->sc_tab); 1512 splx(s); 1513 1514 vnd->sc_flags |= VNF_VUNCONF; 1515 wakeup(&vnd->sc_tab); 1516 while (vnd->sc_flags & VNF_KTHREAD) 1517 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1518 1519 #ifdef VND_COMPRESSION 1520 /* free the compressed file buffers */ 1521 if(vnd->sc_flags & VNF_COMP) { 1522 if(vnd->sc_comp_offsets) { 1523 free(vnd->sc_comp_offsets, M_DEVBUF); 1524 vnd->sc_comp_offsets = NULL; 1525 } 1526 if(vnd->sc_comp_buff) { 1527 free(vnd->sc_comp_buff, M_DEVBUF); 1528 vnd->sc_comp_buff = NULL; 1529 } 1530 if(vnd->sc_comp_decombuf) { 1531 free(vnd->sc_comp_decombuf, M_DEVBUF); 1532 vnd->sc_comp_decombuf = NULL; 1533 } 1534 } 1535 #endif /* VND_COMPRESSION */ 1536 vnd->sc_flags &= 1537 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1538 | VNF_VUNCONF | VNF_COMP); 1539 if (vp == (struct vnode *)0) 1540 panic("vndclear: null vp"); 1541 (void) vn_close(vp, fflags, vnd->sc_cred, l); 1542 kauth_cred_free(vnd->sc_cred); 1543 vnd->sc_vp = (struct vnode *)0; 1544 vnd->sc_cred = (kauth_cred_t)0; 1545 vnd->sc_size = 0; 1546 } 1547 1548 static int 1549 vndsize(dev_t dev) 1550 { 1551 struct vnd_softc *sc; 1552 struct disklabel *lp; 1553 int part, unit, omask; 1554 int size; 1555 1556 unit = vndunit(dev); 1557 sc = (struct vnd_softc *)device_lookup(&vnd_cd, unit); 1558 if (sc == NULL) 1559 return -1; 1560 1561 if ((sc->sc_flags & VNF_INITED) == 0) 1562 return (-1); 1563 1564 part = DISKPART(dev); 1565 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1566 lp = sc->sc_dkdev.dk_label; 1567 1568 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1569 return (-1); 1570 1571 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1572 size = -1; 1573 else 1574 size = lp->d_partitions[part].p_size * 1575 (lp->d_secsize / DEV_BSIZE); 1576 1577 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1578 return (-1); 1579 1580 return (size); 1581 } 1582 1583 static int 1584 vnddump(dev_t dev, daddr_t blkno, void *va, 1585 size_t size) 1586 { 1587 1588 /* Not implemented. */ 1589 return ENXIO; 1590 } 1591 1592 static void 1593 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1594 { 1595 struct vndgeom *vng = &sc->sc_geom; 1596 struct partition *pp; 1597 1598 memset(lp, 0, sizeof(*lp)); 1599 1600 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1601 lp->d_secsize = vng->vng_secsize; 1602 lp->d_nsectors = vng->vng_nsectors; 1603 lp->d_ntracks = vng->vng_ntracks; 1604 lp->d_ncylinders = vng->vng_ncylinders; 1605 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1606 1607 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1608 lp->d_type = DTYPE_VND; 1609 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1610 lp->d_rpm = 3600; 1611 lp->d_interleave = 1; 1612 lp->d_flags = 0; 1613 1614 pp = &lp->d_partitions[RAW_PART]; 1615 pp->p_offset = 0; 1616 pp->p_size = lp->d_secperunit; 1617 pp->p_fstype = FS_UNUSED; 1618 lp->d_npartitions = RAW_PART + 1; 1619 1620 lp->d_magic = DISKMAGIC; 1621 lp->d_magic2 = DISKMAGIC; 1622 lp->d_checksum = dkcksum(lp); 1623 } 1624 1625 /* 1626 * Read the disklabel from a vnd. If one is not present, create a fake one. 1627 */ 1628 static void 1629 vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1630 { 1631 const char *errstring; 1632 struct disklabel *lp = sc->sc_dkdev.dk_label; 1633 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1634 int i; 1635 1636 memset(clp, 0, sizeof(*clp)); 1637 1638 vndgetdefaultlabel(sc, lp); 1639 1640 /* 1641 * Call the generic disklabel extraction routine. 1642 */ 1643 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1644 if (errstring) { 1645 /* 1646 * Lack of disklabel is common, but we print the warning 1647 * anyway, since it might contain other useful information. 1648 */ 1649 printf("%s: %s\n", sc->sc_dev.dv_xname, errstring); 1650 1651 /* 1652 * For historical reasons, if there's no disklabel 1653 * present, all partitions must be FS_BSDFFS and 1654 * occupy the entire disk. 1655 */ 1656 for (i = 0; i < MAXPARTITIONS; i++) { 1657 /* 1658 * Don't wipe out port specific hack (such as 1659 * dos partition hack of i386 port). 1660 */ 1661 if (lp->d_partitions[i].p_size != 0) 1662 continue; 1663 1664 lp->d_partitions[i].p_size = lp->d_secperunit; 1665 lp->d_partitions[i].p_offset = 0; 1666 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1667 } 1668 1669 strncpy(lp->d_packname, "default label", 1670 sizeof(lp->d_packname)); 1671 1672 lp->d_npartitions = MAXPARTITIONS; 1673 lp->d_checksum = dkcksum(lp); 1674 } 1675 1676 /* In-core label now valid. */ 1677 sc->sc_flags |= VNF_VLABEL; 1678 } 1679 1680 /* 1681 * Wait interruptibly for an exclusive lock. 1682 * 1683 * XXX 1684 * Several drivers do this; it should be abstracted and made MP-safe. 1685 */ 1686 static int 1687 vndlock(struct vnd_softc *sc) 1688 { 1689 int error; 1690 1691 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1692 sc->sc_flags |= VNF_WANTED; 1693 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1694 return (error); 1695 } 1696 sc->sc_flags |= VNF_LOCKED; 1697 return (0); 1698 } 1699 1700 /* 1701 * Unlock and wake up any waiters. 1702 */ 1703 static void 1704 vndunlock(struct vnd_softc *sc) 1705 { 1706 1707 sc->sc_flags &= ~VNF_LOCKED; 1708 if ((sc->sc_flags & VNF_WANTED) != 0) { 1709 sc->sc_flags &= ~VNF_WANTED; 1710 wakeup(sc); 1711 } 1712 } 1713 1714 #ifdef VND_COMPRESSION 1715 /* compressed file read */ 1716 static void 1717 compstrategy(struct buf *bp, off_t bn) 1718 { 1719 int error; 1720 int unit = vndunit(bp->b_dev); 1721 struct vnd_softc *vnd = 1722 (struct vnd_softc *)device_lookup(&vnd_cd, unit); 1723 u_int32_t comp_block; 1724 struct uio auio; 1725 char *addr; 1726 int s; 1727 1728 /* set up constants for data move */ 1729 auio.uio_rw = UIO_READ; 1730 UIO_SETUP_SYSSPACE(&auio); 1731 1732 /* read, and transfer the data */ 1733 addr = bp->b_data; 1734 bp->b_resid = bp->b_bcount; 1735 s = splbio(); 1736 while (bp->b_resid > 0) { 1737 unsigned length; 1738 size_t length_in_buffer; 1739 u_int32_t offset_in_buffer; 1740 struct iovec aiov; 1741 1742 /* calculate the compressed block number */ 1743 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1744 1745 /* check for good block number */ 1746 if (comp_block >= vnd->sc_comp_numoffs) { 1747 bp->b_error = EINVAL; 1748 splx(s); 1749 return; 1750 } 1751 1752 /* read in the compressed block, if not in buffer */ 1753 if (comp_block != vnd->sc_comp_buffblk) { 1754 length = vnd->sc_comp_offsets[comp_block + 1] - 1755 vnd->sc_comp_offsets[comp_block]; 1756 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1757 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1758 length, vnd->sc_comp_offsets[comp_block], 1759 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL); 1760 if (error) { 1761 bp->b_error = error; 1762 VOP_UNLOCK(vnd->sc_vp, 0); 1763 splx(s); 1764 return; 1765 } 1766 /* uncompress the buffer */ 1767 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1768 vnd->sc_comp_stream.avail_in = length; 1769 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1770 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1771 inflateReset(&vnd->sc_comp_stream); 1772 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1773 if (error != Z_STREAM_END) { 1774 if (vnd->sc_comp_stream.msg) 1775 printf("%s: compressed file, %s\n", 1776 vnd->sc_dev.dv_xname, 1777 vnd->sc_comp_stream.msg); 1778 bp->b_error = EBADMSG; 1779 VOP_UNLOCK(vnd->sc_vp, 0); 1780 splx(s); 1781 return; 1782 } 1783 vnd->sc_comp_buffblk = comp_block; 1784 VOP_UNLOCK(vnd->sc_vp, 0); 1785 } 1786 1787 /* transfer the usable uncompressed data */ 1788 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1789 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1790 if (length_in_buffer > bp->b_resid) 1791 length_in_buffer = bp->b_resid; 1792 auio.uio_iov = &aiov; 1793 auio.uio_iovcnt = 1; 1794 aiov.iov_base = addr; 1795 aiov.iov_len = length_in_buffer; 1796 auio.uio_resid = aiov.iov_len; 1797 auio.uio_offset = 0; 1798 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1799 length_in_buffer, &auio); 1800 if (error) { 1801 bp->b_error = error; 1802 splx(s); 1803 return; 1804 } 1805 1806 bn += length_in_buffer; 1807 addr += length_in_buffer; 1808 bp->b_resid -= length_in_buffer; 1809 } 1810 splx(s); 1811 } 1812 1813 /* compression memory allocation routines */ 1814 static void * 1815 vnd_alloc(void *aux, u_int items, u_int siz) 1816 { 1817 return malloc(items * siz, M_TEMP, M_NOWAIT); 1818 } 1819 1820 static void 1821 vnd_free(void *aux, void *ptr) 1822 { 1823 free(ptr, M_TEMP); 1824 } 1825 #endif /* VND_COMPRESSION */ 1826