1 /* $NetBSD: vnd.c,v 1.175 2008/01/02 11:48:36 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 72 * 73 * @(#)vn.c 8.9 (Berkeley) 5/14/95 74 */ 75 76 /* 77 * Copyright (c) 1988 University of Utah. 78 * 79 * This code is derived from software contributed to Berkeley by 80 * the Systems Programming Group of the University of Utah Computer 81 * Science Department. 82 * 83 * Redistribution and use in source and binary forms, with or without 84 * modification, are permitted provided that the following conditions 85 * are met: 86 * 1. Redistributions of source code must retain the above copyright 87 * notice, this list of conditions and the following disclaimer. 88 * 2. Redistributions in binary form must reproduce the above copyright 89 * notice, this list of conditions and the following disclaimer in the 90 * documentation and/or other materials provided with the distribution. 91 * 3. All advertising materials mentioning features or use of this software 92 * must display the following acknowledgement: 93 * This product includes software developed by the University of 94 * California, Berkeley and its contributors. 95 * 4. Neither the name of the University nor the names of its contributors 96 * may be used to endorse or promote products derived from this software 97 * without specific prior written permission. 98 * 99 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 101 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 102 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 103 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 104 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 105 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 106 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 107 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 108 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 109 * SUCH DAMAGE. 110 * 111 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 112 * 113 * @(#)vn.c 8.9 (Berkeley) 5/14/95 114 */ 115 116 /* 117 * Vnode disk driver. 118 * 119 * Block/character interface to a vnode. Allows one to treat a file 120 * as a disk (e.g. build a filesystem in it, mount it, etc.). 121 * 122 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 123 * this uses them to avoid distorting the local buffer cache. If those 124 * block-level operations are not available, this falls back to the regular 125 * read and write calls. Using these may distort the cache in some cases 126 * but better have the driver working than preventing it to work on file 127 * systems where the block-level operations are not implemented for 128 * whatever reason. 129 * 130 * NOTE 2: There is a security issue involved with this driver. 131 * Once mounted all access to the contents of the "mapped" file via 132 * the special file is controlled by the permissions on the special 133 * file, the protection of the mapped file is ignored (effectively, 134 * by using root credentials in all transactions). 135 * 136 * NOTE 3: Doesn't interact with leases, should it? 137 */ 138 139 #include <sys/cdefs.h> 140 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.175 2008/01/02 11:48:36 ad Exp $"); 141 142 #if defined(_KERNEL_OPT) 143 #include "fs_nfs.h" 144 #include "opt_vnd.h" 145 #endif 146 147 #include <sys/param.h> 148 #include <sys/systm.h> 149 #include <sys/namei.h> 150 #include <sys/proc.h> 151 #include <sys/kthread.h> 152 #include <sys/errno.h> 153 #include <sys/buf.h> 154 #include <sys/bufq.h> 155 #include <sys/malloc.h> 156 #include <sys/ioctl.h> 157 #include <sys/disklabel.h> 158 #include <sys/device.h> 159 #include <sys/disk.h> 160 #include <sys/stat.h> 161 #include <sys/mount.h> 162 #include <sys/vnode.h> 163 #include <sys/file.h> 164 #include <sys/uio.h> 165 #include <sys/conf.h> 166 #include <sys/kauth.h> 167 168 #include <net/zlib.h> 169 170 #include <miscfs/genfs/genfs.h> 171 #include <miscfs/specfs/specdev.h> 172 173 #include <dev/vndvar.h> 174 175 #include <prop/proplib.h> 176 177 #if defined(VNDDEBUG) && !defined(DEBUG) 178 #define DEBUG 179 #endif 180 181 #ifdef DEBUG 182 int dovndcluster = 1; 183 #define VDB_FOLLOW 0x01 184 #define VDB_INIT 0x02 185 #define VDB_IO 0x04 186 #define VDB_LABEL 0x08 187 int vnddebug = 0x00; 188 #endif 189 190 #define vndunit(x) DISKUNIT(x) 191 192 struct vndxfer { 193 struct buf vx_buf; 194 struct vnd_softc *vx_vnd; 195 }; 196 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 197 198 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 199 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 200 201 #define VNDLABELDEV(dev) \ 202 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 203 204 /* called by main() at boot time (XXX: and the LKM driver) */ 205 void vndattach(int); 206 207 static void vndclear(struct vnd_softc *, int); 208 static int vndsetcred(struct vnd_softc *, kauth_cred_t); 209 static void vndthrottle(struct vnd_softc *, struct vnode *); 210 static void vndiodone(struct buf *); 211 #if 0 212 static void vndshutdown(void); 213 #endif 214 215 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 216 static void vndgetdisklabel(dev_t, struct vnd_softc *); 217 218 static int vndlock(struct vnd_softc *); 219 static void vndunlock(struct vnd_softc *); 220 #ifdef VND_COMPRESSION 221 static void compstrategy(struct buf *, off_t); 222 static void *vnd_alloc(void *, u_int, u_int); 223 static void vnd_free(void *, void *); 224 #endif /* VND_COMPRESSION */ 225 226 static void vndthread(void *); 227 static bool vnode_has_op(const struct vnode *, int); 228 static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 229 struct buf *); 230 static void handle_with_strategy(struct vnd_softc *, const struct buf *, 231 struct buf *); 232 static void vnd_set_properties(struct vnd_softc *); 233 234 static dev_type_open(vndopen); 235 static dev_type_close(vndclose); 236 static dev_type_read(vndread); 237 static dev_type_write(vndwrite); 238 static dev_type_ioctl(vndioctl); 239 static dev_type_strategy(vndstrategy); 240 static dev_type_dump(vnddump); 241 static dev_type_size(vndsize); 242 243 const struct bdevsw vnd_bdevsw = { 244 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 245 }; 246 247 const struct cdevsw vnd_cdevsw = { 248 vndopen, vndclose, vndread, vndwrite, vndioctl, 249 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 250 }; 251 252 static int vnd_match(struct device *, struct cfdata *, void *); 253 static void vnd_attach(struct device *, struct device *, void *); 254 static int vnd_detach(struct device *, int); 255 256 CFATTACH_DECL(vnd, sizeof(struct vnd_softc), 257 vnd_match, vnd_attach, vnd_detach, NULL); 258 extern struct cfdriver vnd_cd; 259 260 static struct vnd_softc *vnd_spawn(int); 261 int vnd_destroy(struct device *); 262 263 void 264 vndattach(int num) 265 { 266 int error; 267 268 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 269 if (error) 270 aprint_error("%s: unable to register cfattach\n", 271 vnd_cd.cd_name); 272 } 273 274 static int 275 vnd_match(struct device *self, struct cfdata *cfdata, 276 void *aux) 277 { 278 return 1; 279 } 280 281 static void 282 vnd_attach(struct device *parent, struct device *self, 283 void *aux) 284 { 285 struct vnd_softc *sc = (struct vnd_softc *)self; 286 287 sc->sc_comp_offsets = NULL; 288 sc->sc_comp_buff = NULL; 289 sc->sc_comp_decombuf = NULL; 290 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 291 disk_init(&sc->sc_dkdev, self->dv_xname, NULL); 292 if (!pmf_device_register(self, NULL, NULL)) 293 aprint_error_dev(self, "couldn't establish power handler\n"); 294 } 295 296 static int 297 vnd_detach(struct device *self, int flags) 298 { 299 struct vnd_softc *sc = (struct vnd_softc *)self; 300 if (sc->sc_flags & VNF_INITED) 301 return EBUSY; 302 303 pmf_device_deregister(self); 304 bufq_free(sc->sc_tab); 305 disk_destroy(&sc->sc_dkdev); 306 307 return 0; 308 } 309 310 static struct vnd_softc * 311 vnd_spawn(int unit) 312 { 313 struct cfdata *cf; 314 315 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 316 cf->cf_name = vnd_cd.cd_name; 317 cf->cf_atname = vnd_cd.cd_name; 318 cf->cf_unit = unit; 319 cf->cf_fstate = FSTATE_STAR; 320 321 return (struct vnd_softc *)config_attach_pseudo(cf); 322 } 323 324 int 325 vnd_destroy(struct device *dev) 326 { 327 int error; 328 struct cfdata *cf; 329 330 cf = device_cfdata(dev); 331 error = config_detach(dev, DETACH_QUIET); 332 if (error) 333 return error; 334 free(cf, M_DEVBUF); 335 return 0; 336 } 337 338 static int 339 vndopen(dev_t dev, int flags, int mode, struct lwp *l) 340 { 341 int unit = vndunit(dev); 342 struct vnd_softc *sc; 343 int error = 0, part, pmask; 344 struct disklabel *lp; 345 346 #ifdef DEBUG 347 if (vnddebug & VDB_FOLLOW) 348 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 349 #endif 350 sc = device_lookup(&vnd_cd, unit); 351 if (sc == NULL) { 352 sc = vnd_spawn(unit); 353 if (sc == NULL) 354 return ENOMEM; 355 } 356 357 if ((error = vndlock(sc)) != 0) 358 return (error); 359 360 lp = sc->sc_dkdev.dk_label; 361 362 part = DISKPART(dev); 363 pmask = (1 << part); 364 365 /* 366 * If we're initialized, check to see if there are any other 367 * open partitions. If not, then it's safe to update the 368 * in-core disklabel. Only read the disklabel if it is 369 * not already valid. 370 */ 371 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 372 sc->sc_dkdev.dk_openmask == 0) 373 vndgetdisklabel(dev, sc); 374 375 /* Check that the partitions exists. */ 376 if (part != RAW_PART) { 377 if (((sc->sc_flags & VNF_INITED) == 0) || 378 ((part >= lp->d_npartitions) || 379 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 380 error = ENXIO; 381 goto done; 382 } 383 } 384 385 /* Prevent our unit from being unconfigured while open. */ 386 switch (mode) { 387 case S_IFCHR: 388 sc->sc_dkdev.dk_copenmask |= pmask; 389 break; 390 391 case S_IFBLK: 392 sc->sc_dkdev.dk_bopenmask |= pmask; 393 break; 394 } 395 sc->sc_dkdev.dk_openmask = 396 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 397 398 done: 399 vndunlock(sc); 400 return (error); 401 } 402 403 static int 404 vndclose(dev_t dev, int flags, int mode, struct lwp *l) 405 { 406 int unit = vndunit(dev); 407 struct vnd_softc *sc; 408 int error = 0, part; 409 410 #ifdef DEBUG 411 if (vnddebug & VDB_FOLLOW) 412 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 413 #endif 414 sc = device_lookup(&vnd_cd, unit); 415 if (sc == NULL) 416 return ENXIO; 417 418 if ((error = vndlock(sc)) != 0) 419 return (error); 420 421 part = DISKPART(dev); 422 423 /* ...that much closer to allowing unconfiguration... */ 424 switch (mode) { 425 case S_IFCHR: 426 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 427 break; 428 429 case S_IFBLK: 430 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 431 break; 432 } 433 sc->sc_dkdev.dk_openmask = 434 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 435 436 vndunlock(sc); 437 438 if ((sc->sc_flags & VNF_INITED) == 0) { 439 if ((error = vnd_destroy((struct device *)sc)) != 0) { 440 aprint_error("%s: unable to detach instance\n", 441 sc->sc_dev.dv_xname); 442 return error; 443 } 444 } 445 446 return (0); 447 } 448 449 /* 450 * Queue the request, and wakeup the kernel thread to handle it. 451 */ 452 static void 453 vndstrategy(struct buf *bp) 454 { 455 int unit = vndunit(bp->b_dev); 456 struct vnd_softc *vnd = 457 (struct vnd_softc *)device_lookup(&vnd_cd, unit); 458 struct disklabel *lp = vnd->sc_dkdev.dk_label; 459 daddr_t blkno; 460 int s = splbio(); 461 462 if ((vnd->sc_flags & VNF_INITED) == 0) { 463 bp->b_error = ENXIO; 464 goto done; 465 } 466 467 /* 468 * The transfer must be a whole number of blocks. 469 */ 470 if ((bp->b_bcount % lp->d_secsize) != 0) { 471 bp->b_error = EINVAL; 472 goto done; 473 } 474 475 /* 476 * check if we're read-only. 477 */ 478 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 479 bp->b_error = EACCES; 480 goto done; 481 } 482 483 /* If it's a nil transfer, wake up the top half now. */ 484 if (bp->b_bcount == 0) { 485 goto done; 486 } 487 488 /* 489 * Do bounds checking and adjust transfer. If there's an error, 490 * the bounds check will flag that for us. 491 */ 492 if (DISKPART(bp->b_dev) == RAW_PART) { 493 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 494 vnd->sc_size) <= 0) 495 goto done; 496 } else { 497 if (bounds_check_with_label(&vnd->sc_dkdev, 498 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 499 goto done; 500 } 501 502 /* 503 * Put the block number in terms of the logical blocksize 504 * of the "device". 505 */ 506 507 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 508 509 /* 510 * Translate the partition-relative block number to an absolute. 511 */ 512 if (DISKPART(bp->b_dev) != RAW_PART) { 513 struct partition *pp; 514 515 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 516 DISKPART(bp->b_dev)]; 517 blkno += pp->p_offset; 518 } 519 bp->b_rawblkno = blkno; 520 521 #ifdef DEBUG 522 if (vnddebug & VDB_FOLLOW) 523 printf("vndstrategy(%p): unit %d\n", bp, unit); 524 #endif 525 BUFQ_PUT(vnd->sc_tab, bp); 526 wakeup(&vnd->sc_tab); 527 splx(s); 528 return; 529 530 done: 531 bp->b_resid = bp->b_bcount; 532 biodone(bp); 533 splx(s); 534 } 535 536 static void 537 vndthread(void *arg) 538 { 539 struct vnd_softc *vnd = arg; 540 bool usestrategy; 541 int s; 542 543 /* Determine whether we can use VOP_BMAP and VOP_STRATEGY to 544 * directly access the backing vnode. If we can, use these two 545 * operations to avoid messing with the local buffer cache. 546 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 547 * which are guaranteed to work with any file system. */ 548 usestrategy = vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 549 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 550 551 #ifdef DEBUG 552 if (vnddebug & VDB_INIT) 553 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 554 usestrategy ? 555 "using bmap/strategy operations" : 556 "using read/write operations"); 557 #endif 558 559 s = splbio(); 560 vnd->sc_flags |= VNF_KTHREAD; 561 wakeup(&vnd->sc_kthread); 562 563 /* 564 * Dequeue requests and serve them depending on the available 565 * vnode operations. 566 */ 567 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 568 struct vndxfer *vnx; 569 int flags; 570 struct buf *obp; 571 struct buf *bp; 572 573 obp = BUFQ_GET(vnd->sc_tab); 574 if (obp == NULL) { 575 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 576 continue; 577 }; 578 splx(s); 579 flags = obp->b_flags; 580 #ifdef DEBUG 581 if (vnddebug & VDB_FOLLOW) 582 printf("vndthread(%p\n", obp); 583 #endif 584 585 if (vnd->sc_vp->v_mount == NULL) { 586 obp->b_error = ENXIO; 587 goto done; 588 } 589 #ifdef VND_COMPRESSION 590 /* handle a compressed read */ 591 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 592 off_t bn; 593 594 /* Convert to a byte offset within the file. */ 595 bn = obp->b_rawblkno * 596 vnd->sc_dkdev.dk_label->d_secsize; 597 598 compstrategy(obp, bn); 599 goto done; 600 } 601 #endif /* VND_COMPRESSION */ 602 603 /* 604 * Allocate a header for this transfer and link it to the 605 * buffer 606 */ 607 s = splbio(); 608 vnx = VND_GETXFER(vnd); 609 splx(s); 610 vnx->vx_vnd = vnd; 611 612 s = splbio(); 613 while (vnd->sc_active >= vnd->sc_maxactive) { 614 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 615 } 616 vnd->sc_active++; 617 splx(s); 618 619 /* Instrumentation. */ 620 disk_busy(&vnd->sc_dkdev); 621 622 bp = &vnx->vx_buf; 623 buf_init(bp); 624 bp->b_flags = (obp->b_flags & B_READ); 625 bp->b_oflags = obp->b_oflags; 626 bp->b_cflags = obp->b_cflags; 627 bp->b_iodone = vndiodone; 628 bp->b_private = obp; 629 bp->b_vp = vnd->sc_vp; 630 bp->b_objlock = &bp->b_vp->v_interlock; 631 bp->b_data = obp->b_data; 632 bp->b_bcount = obp->b_bcount; 633 BIO_COPYPRIO(bp, obp); 634 635 /* Handle the request using the appropriate operations. */ 636 if (usestrategy) 637 handle_with_strategy(vnd, obp, bp); 638 else 639 handle_with_rdwr(vnd, obp, bp); 640 641 s = splbio(); 642 continue; 643 644 done: 645 biodone(obp); 646 s = splbio(); 647 } 648 649 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 650 wakeup(&vnd->sc_kthread); 651 splx(s); 652 kthread_exit(0); 653 } 654 655 /* 656 * Checks if the given vnode supports the requested operation. 657 * The operation is specified the offset returned by VOFFSET. 658 * 659 * XXX The test below used to determine this is quite fragile 660 * because it relies on the file system to use genfs to specify 661 * unimplemented operations. There might be another way to do 662 * it more cleanly. 663 */ 664 static bool 665 vnode_has_op(const struct vnode *vp, int opoffset) 666 { 667 int (*defaultp)(void *); 668 int (*opp)(void *); 669 670 defaultp = vp->v_op[VOFFSET(vop_default)]; 671 opp = vp->v_op[opoffset]; 672 673 return opp != defaultp && opp != genfs_eopnotsupp && 674 opp != genfs_badop && opp != genfs_nullop; 675 } 676 677 /* 678 * Handes the read/write request given in 'bp' using the vnode's VOP_READ 679 * and VOP_WRITE operations. 680 * 681 * 'obp' is a pointer to the original request fed to the vnd device. 682 */ 683 static void 684 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 685 { 686 bool doread; 687 off_t offset; 688 size_t resid; 689 struct vnode *vp; 690 691 doread = bp->b_flags & B_READ; 692 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 693 vp = vnd->sc_vp; 694 695 #if defined(DEBUG) 696 if (vnddebug & VDB_IO) 697 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 698 ", secsize %d, offset %" PRIu64 699 ", bcount %d\n", 700 vp, doread ? "read" : "write", obp->b_rawblkno, 701 vnd->sc_dkdev.dk_label->d_secsize, offset, 702 bp->b_bcount); 703 #endif 704 705 /* Issue the read or write operation. */ 706 bp->b_error = 707 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 708 vp, bp->b_data, bp->b_bcount, offset, 709 UIO_SYSSPACE, 0, vnd->sc_cred, &resid, NULL); 710 bp->b_resid = resid; 711 712 /* We need to increase the number of outputs on the vnode if 713 * there was any write to it. */ 714 if (!doread) { 715 mutex_enter(&vp->v_interlock); 716 vp->v_numoutput++; 717 mutex_exit(&vp->v_interlock); 718 } 719 720 biodone(bp); 721 } 722 723 /* 724 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 725 * and VOP_STRATEGY operations. 726 * 727 * 'obp' is a pointer to the original request fed to the vnd device. 728 */ 729 static void 730 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 731 struct buf *bp) 732 { 733 int bsize, error, flags, skipped; 734 size_t resid, sz; 735 off_t bn, offset; 736 struct vnode *vp; 737 738 flags = obp->b_flags; 739 740 if (!(flags & B_READ)) { 741 vp = bp->b_vp; 742 mutex_enter(&vp->v_interlock); 743 vp->v_numoutput++; 744 mutex_exit(&vp->v_interlock); 745 } 746 747 /* convert to a byte offset within the file. */ 748 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 749 750 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 751 skipped = 0; 752 753 /* 754 * Break the request into bsize pieces and feed them 755 * sequentially using VOP_BMAP/VOP_STRATEGY. 756 * We do it this way to keep from flooding NFS servers if we 757 * are connected to an NFS file. This places the burden on 758 * the client rather than the server. 759 */ 760 error = 0; 761 bp->b_resid = bp->b_bcount; 762 for (offset = 0, resid = bp->b_resid; resid; 763 resid -= sz, offset += sz) { 764 struct buf *nbp; 765 daddr_t nbn; 766 int off, nra; 767 768 nra = 0; 769 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 770 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 771 VOP_UNLOCK(vnd->sc_vp, 0); 772 773 if (error == 0 && (long)nbn == -1) 774 error = EIO; 775 776 /* 777 * If there was an error or a hole in the file...punt. 778 * Note that we may have to wait for any operations 779 * that we have already fired off before releasing 780 * the buffer. 781 * 782 * XXX we could deal with holes here but it would be 783 * a hassle (in the write case). 784 */ 785 if (error) { 786 skipped += resid; 787 break; 788 } 789 790 #ifdef DEBUG 791 if (!dovndcluster) 792 nra = 0; 793 #endif 794 795 off = bn % bsize; 796 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 797 #ifdef DEBUG 798 if (vnddebug & VDB_IO) 799 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 800 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, 801 nbn, sz); 802 #endif 803 804 nbp = getiobuf(vp, true); 805 nestiobuf_setup(bp, nbp, offset, sz); 806 nbp->b_blkno = nbn + btodb(off); 807 808 #if 0 /* XXX #ifdef DEBUG */ 809 if (vnddebug & VDB_IO) 810 printf("vndstart(%ld): bp %p vp %p blkno " 811 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 812 (long) (vnd-vnd_softc), &nbp->vb_buf, 813 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 814 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 815 nbp->vb_buf.b_bcount); 816 #endif 817 VOP_STRATEGY(vp, nbp); 818 bn += sz; 819 } 820 nestiobuf_done(bp, skipped, error); 821 } 822 823 static void 824 vndiodone(struct buf *bp) 825 { 826 struct vndxfer *vnx = VND_BUFTOXFER(bp); 827 struct vnd_softc *vnd = vnx->vx_vnd; 828 struct buf *obp = bp->b_private; 829 830 KASSERT(&vnx->vx_buf == bp); 831 KASSERT(vnd->sc_active > 0); 832 #ifdef DEBUG 833 if (vnddebug & VDB_IO) { 834 printf("vndiodone1: bp %p iodone: error %d\n", 835 bp, bp->b_error); 836 } 837 #endif 838 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 839 (bp->b_flags & B_READ)); 840 vnd->sc_active--; 841 if (vnd->sc_active == 0) { 842 wakeup(&vnd->sc_tab); 843 } 844 obp->b_error = bp->b_error; 845 obp->b_resid = bp->b_resid; 846 VND_PUTXFER(vnd, vnx); 847 biodone(obp); 848 } 849 850 /* ARGSUSED */ 851 static int 852 vndread(dev_t dev, struct uio *uio, int flags) 853 { 854 int unit = vndunit(dev); 855 struct vnd_softc *sc; 856 857 #ifdef DEBUG 858 if (vnddebug & VDB_FOLLOW) 859 printf("vndread(0x%x, %p)\n", dev, uio); 860 #endif 861 862 sc = device_lookup(&vnd_cd, unit); 863 if (sc == NULL) 864 return ENXIO; 865 866 if ((sc->sc_flags & VNF_INITED) == 0) 867 return (ENXIO); 868 869 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 870 } 871 872 /* ARGSUSED */ 873 static int 874 vndwrite(dev_t dev, struct uio *uio, int flags) 875 { 876 int unit = vndunit(dev); 877 struct vnd_softc *sc; 878 879 #ifdef DEBUG 880 if (vnddebug & VDB_FOLLOW) 881 printf("vndwrite(0x%x, %p)\n", dev, uio); 882 #endif 883 884 sc = device_lookup(&vnd_cd, unit); 885 if (sc == NULL) 886 return ENXIO; 887 888 if ((sc->sc_flags & VNF_INITED) == 0) 889 return (ENXIO); 890 891 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 892 } 893 894 static int 895 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 896 { 897 struct vnd_softc *vnd; 898 899 if (*un == -1) 900 *un = unit; 901 if (*un < 0) 902 return EINVAL; 903 904 vnd = device_lookup(&vnd_cd, *un); 905 if (vnd == NULL) 906 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1; 907 908 if ((vnd->sc_flags & VNF_INITED) == 0) 909 return -1; 910 911 return VOP_GETATTR(vnd->sc_vp, va, l->l_cred); 912 } 913 914 /* ARGSUSED */ 915 static int 916 vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 917 { 918 int unit = vndunit(dev); 919 struct vnd_softc *vnd; 920 struct vnd_ioctl *vio; 921 struct vattr vattr; 922 struct nameidata nd; 923 int error, part, pmask; 924 size_t geomsize; 925 int fflags; 926 #ifdef __HAVE_OLD_DISKLABEL 927 struct disklabel newlabel; 928 #endif 929 930 #ifdef DEBUG 931 if (vnddebug & VDB_FOLLOW) 932 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 933 dev, cmd, data, flag, l->l_proc, unit); 934 #endif 935 vnd = device_lookup(&vnd_cd, unit); 936 if (vnd == NULL && 937 #ifdef COMPAT_30 938 cmd != VNDIOOCGET && 939 #endif 940 cmd != VNDIOCGET) 941 return ENXIO; 942 vio = (struct vnd_ioctl *)data; 943 944 /* Must be open for writes for these commands... */ 945 switch (cmd) { 946 case VNDIOCSET: 947 case VNDIOCCLR: 948 case DIOCSDINFO: 949 case DIOCWDINFO: 950 #ifdef __HAVE_OLD_DISKLABEL 951 case ODIOCSDINFO: 952 case ODIOCWDINFO: 953 #endif 954 case DIOCKLABEL: 955 case DIOCWLABEL: 956 if ((flag & FWRITE) == 0) 957 return (EBADF); 958 } 959 960 /* Must be initialized for these... */ 961 switch (cmd) { 962 case VNDIOCCLR: 963 case DIOCGDINFO: 964 case DIOCSDINFO: 965 case DIOCWDINFO: 966 case DIOCGPART: 967 case DIOCKLABEL: 968 case DIOCWLABEL: 969 case DIOCGDEFLABEL: 970 #ifdef __HAVE_OLD_DISKLABEL 971 case ODIOCGDINFO: 972 case ODIOCSDINFO: 973 case ODIOCWDINFO: 974 case ODIOCGDEFLABEL: 975 #endif 976 if ((vnd->sc_flags & VNF_INITED) == 0) 977 return (ENXIO); 978 } 979 980 switch (cmd) { 981 case VNDIOCSET: 982 if (vnd->sc_flags & VNF_INITED) 983 return (EBUSY); 984 985 if ((error = vndlock(vnd)) != 0) 986 return (error); 987 988 fflags = FREAD; 989 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 990 fflags |= FWRITE; 991 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file); 992 if ((error = vn_open(&nd, fflags, 0)) != 0) 993 goto unlock_and_exit; 994 KASSERT(l); 995 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred); 996 if (!error && nd.ni_vp->v_type != VREG) 997 error = EOPNOTSUPP; 998 if (error) { 999 VOP_UNLOCK(nd.ni_vp, 0); 1000 goto close_and_exit; 1001 } 1002 1003 /* If using a compressed file, initialize its info */ 1004 /* (or abort with an error if kernel has no compression) */ 1005 if (vio->vnd_flags & VNF_COMP) { 1006 #ifdef VND_COMPRESSION 1007 struct vnd_comp_header *ch; 1008 int i; 1009 u_int32_t comp_size; 1010 u_int32_t comp_maxsize; 1011 1012 /* allocate space for compresed file header */ 1013 ch = malloc(sizeof(struct vnd_comp_header), 1014 M_TEMP, M_WAITOK); 1015 1016 /* read compressed file header */ 1017 error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch, 1018 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1019 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1020 if(error) { 1021 free(ch, M_TEMP); 1022 VOP_UNLOCK(nd.ni_vp, 0); 1023 goto close_and_exit; 1024 } 1025 1026 /* save some header info */ 1027 vnd->sc_comp_blksz = ntohl(ch->block_size); 1028 /* note last offset is the file byte size */ 1029 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 1030 free(ch, M_TEMP); 1031 if (vnd->sc_comp_blksz == 0 || 1032 vnd->sc_comp_blksz % DEV_BSIZE !=0) { 1033 VOP_UNLOCK(nd.ni_vp, 0); 1034 error = EINVAL; 1035 goto close_and_exit; 1036 } 1037 if(sizeof(struct vnd_comp_header) + 1038 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 1039 vattr.va_size) { 1040 VOP_UNLOCK(nd.ni_vp, 0); 1041 error = EINVAL; 1042 goto close_and_exit; 1043 } 1044 1045 /* set decompressed file size */ 1046 vattr.va_size = 1047 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1048 (u_quad_t)vnd->sc_comp_blksz; 1049 1050 /* allocate space for all the compressed offsets */ 1051 vnd->sc_comp_offsets = 1052 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 1053 M_DEVBUF, M_WAITOK); 1054 1055 /* read in the offsets */ 1056 error = vn_rdwr(UIO_READ, nd.ni_vp, 1057 (void *)vnd->sc_comp_offsets, 1058 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 1059 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1060 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1061 if(error) { 1062 VOP_UNLOCK(nd.ni_vp, 0); 1063 goto close_and_exit; 1064 } 1065 /* 1066 * find largest block size (used for allocation limit). 1067 * Also convert offset to native byte order. 1068 */ 1069 comp_maxsize = 0; 1070 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1071 vnd->sc_comp_offsets[i] = 1072 be64toh(vnd->sc_comp_offsets[i]); 1073 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 1074 - vnd->sc_comp_offsets[i]; 1075 if (comp_size > comp_maxsize) 1076 comp_maxsize = comp_size; 1077 } 1078 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1079 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 1080 1081 /* create compressed data buffer */ 1082 vnd->sc_comp_buff = malloc(comp_maxsize, 1083 M_DEVBUF, M_WAITOK); 1084 1085 /* create decompressed buffer */ 1086 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1087 M_DEVBUF, M_WAITOK); 1088 vnd->sc_comp_buffblk = -1; 1089 1090 /* Initialize decompress stream */ 1091 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 1092 vnd->sc_comp_stream.zalloc = vnd_alloc; 1093 vnd->sc_comp_stream.zfree = vnd_free; 1094 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1095 if(error) { 1096 if(vnd->sc_comp_stream.msg) 1097 printf("vnd%d: compressed file, %s\n", 1098 unit, vnd->sc_comp_stream.msg); 1099 VOP_UNLOCK(nd.ni_vp, 0); 1100 error = EINVAL; 1101 goto close_and_exit; 1102 } 1103 1104 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1105 #else /* !VND_COMPRESSION */ 1106 VOP_UNLOCK(nd.ni_vp, 0); 1107 error = EOPNOTSUPP; 1108 goto close_and_exit; 1109 #endif /* VND_COMPRESSION */ 1110 } 1111 1112 VOP_UNLOCK(nd.ni_vp, 0); 1113 vnd->sc_vp = nd.ni_vp; 1114 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1115 1116 /* 1117 * Use pseudo-geometry specified. If none was provided, 1118 * use "standard" Adaptec fictitious geometry. 1119 */ 1120 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1121 1122 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1123 sizeof(vio->vnd_geom)); 1124 1125 /* 1126 * Sanity-check the sector size. 1127 * XXX Don't allow secsize < DEV_BSIZE. Should 1128 * XXX we? 1129 */ 1130 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 1131 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 1132 vnd->sc_geom.vng_ncylinders == 0 || 1133 (vnd->sc_geom.vng_ntracks * 1134 vnd->sc_geom.vng_nsectors) == 0) { 1135 error = EINVAL; 1136 goto close_and_exit; 1137 } 1138 1139 /* 1140 * Compute the size (in DEV_BSIZE blocks) specified 1141 * by the geometry. 1142 */ 1143 geomsize = (vnd->sc_geom.vng_nsectors * 1144 vnd->sc_geom.vng_ntracks * 1145 vnd->sc_geom.vng_ncylinders) * 1146 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1147 1148 /* 1149 * Sanity-check the size against the specified 1150 * geometry. 1151 */ 1152 if (vnd->sc_size < geomsize) { 1153 error = EINVAL; 1154 goto close_and_exit; 1155 } 1156 } else if (vnd->sc_size >= (32 * 64)) { 1157 /* 1158 * Size must be at least 2048 DEV_BSIZE blocks 1159 * (1M) in order to use this geometry. 1160 */ 1161 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1162 vnd->sc_geom.vng_nsectors = 32; 1163 vnd->sc_geom.vng_ntracks = 64; 1164 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1165 } else { 1166 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1167 vnd->sc_geom.vng_nsectors = 1; 1168 vnd->sc_geom.vng_ntracks = 1; 1169 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1170 } 1171 1172 vnd_set_properties(vnd); 1173 1174 if (vio->vnd_flags & VNDIOF_READONLY) { 1175 vnd->sc_flags |= VNF_READONLY; 1176 } 1177 1178 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1179 goto close_and_exit; 1180 1181 vndthrottle(vnd, vnd->sc_vp); 1182 vio->vnd_size = dbtob(vnd->sc_size); 1183 vnd->sc_flags |= VNF_INITED; 1184 1185 /* create the kernel thread, wait for it to be up */ 1186 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, 1187 &vnd->sc_kthread, vnd->sc_dev.dv_xname); 1188 if (error) 1189 goto close_and_exit; 1190 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1191 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1192 } 1193 #ifdef DEBUG 1194 if (vnddebug & VDB_INIT) 1195 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1196 vnd->sc_vp, (unsigned long) vnd->sc_size, 1197 vnd->sc_geom.vng_secsize, 1198 vnd->sc_geom.vng_nsectors, 1199 vnd->sc_geom.vng_ntracks, 1200 vnd->sc_geom.vng_ncylinders); 1201 #endif 1202 1203 /* Attach the disk. */ 1204 disk_attach(&vnd->sc_dkdev); 1205 1206 /* Initialize the xfer and buffer pools. */ 1207 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1208 0, 0, "vndxpl", NULL, IPL_BIO); 1209 1210 /* Try and read the disklabel. */ 1211 vndgetdisklabel(dev, vnd); 1212 1213 vndunlock(vnd); 1214 1215 break; 1216 1217 close_and_exit: 1218 (void) vn_close(nd.ni_vp, fflags, l->l_cred, l); 1219 unlock_and_exit: 1220 #ifdef VND_COMPRESSION 1221 /* free any allocated memory (for compressed file) */ 1222 if(vnd->sc_comp_offsets) { 1223 free(vnd->sc_comp_offsets, M_DEVBUF); 1224 vnd->sc_comp_offsets = NULL; 1225 } 1226 if(vnd->sc_comp_buff) { 1227 free(vnd->sc_comp_buff, M_DEVBUF); 1228 vnd->sc_comp_buff = NULL; 1229 } 1230 if(vnd->sc_comp_decombuf) { 1231 free(vnd->sc_comp_decombuf, M_DEVBUF); 1232 vnd->sc_comp_decombuf = NULL; 1233 } 1234 #endif /* VND_COMPRESSION */ 1235 vndunlock(vnd); 1236 return (error); 1237 1238 case VNDIOCCLR: 1239 if ((error = vndlock(vnd)) != 0) 1240 return (error); 1241 1242 /* 1243 * Don't unconfigure if any other partitions are open 1244 * or if both the character and block flavors of this 1245 * partition are open. 1246 */ 1247 part = DISKPART(dev); 1248 pmask = (1 << part); 1249 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1250 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1251 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1252 !(vio->vnd_flags & VNDIOF_FORCE)) { 1253 vndunlock(vnd); 1254 return (EBUSY); 1255 } 1256 1257 /* 1258 * XXX vndclear() might call vndclose() implicitely; 1259 * release lock to avoid recursion 1260 */ 1261 vndunlock(vnd); 1262 vndclear(vnd, minor(dev)); 1263 #ifdef DEBUG 1264 if (vnddebug & VDB_INIT) 1265 printf("vndioctl: CLRed\n"); 1266 #endif 1267 1268 /* Destroy the xfer and buffer pools. */ 1269 pool_destroy(&vnd->sc_vxpool); 1270 1271 /* Detatch the disk. */ 1272 disk_detach(&vnd->sc_dkdev); 1273 break; 1274 1275 #ifdef COMPAT_30 1276 case VNDIOOCGET: { 1277 struct vnd_ouser *vnu; 1278 struct vattr va; 1279 vnu = (struct vnd_ouser *)data; 1280 KASSERT(l); 1281 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1282 case 0: 1283 vnu->vnu_dev = va.va_fsid; 1284 vnu->vnu_ino = va.va_fileid; 1285 break; 1286 case -1: 1287 /* unused is not an error */ 1288 vnu->vnu_dev = 0; 1289 vnu->vnu_ino = 0; 1290 break; 1291 default: 1292 return error; 1293 } 1294 break; 1295 } 1296 #endif 1297 case VNDIOCGET: { 1298 struct vnd_user *vnu; 1299 struct vattr va; 1300 vnu = (struct vnd_user *)data; 1301 KASSERT(l); 1302 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1303 case 0: 1304 vnu->vnu_dev = va.va_fsid; 1305 vnu->vnu_ino = va.va_fileid; 1306 break; 1307 case -1: 1308 /* unused is not an error */ 1309 vnu->vnu_dev = 0; 1310 vnu->vnu_ino = 0; 1311 break; 1312 default: 1313 return error; 1314 } 1315 break; 1316 } 1317 1318 case DIOCGDINFO: 1319 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1320 break; 1321 1322 #ifdef __HAVE_OLD_DISKLABEL 1323 case ODIOCGDINFO: 1324 newlabel = *(vnd->sc_dkdev.dk_label); 1325 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1326 return ENOTTY; 1327 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1328 break; 1329 #endif 1330 1331 case DIOCGPART: 1332 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1333 ((struct partinfo *)data)->part = 1334 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1335 break; 1336 1337 case DIOCWDINFO: 1338 case DIOCSDINFO: 1339 #ifdef __HAVE_OLD_DISKLABEL 1340 case ODIOCWDINFO: 1341 case ODIOCSDINFO: 1342 #endif 1343 { 1344 struct disklabel *lp; 1345 1346 if ((error = vndlock(vnd)) != 0) 1347 return (error); 1348 1349 vnd->sc_flags |= VNF_LABELLING; 1350 1351 #ifdef __HAVE_OLD_DISKLABEL 1352 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1353 memset(&newlabel, 0, sizeof newlabel); 1354 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1355 lp = &newlabel; 1356 } else 1357 #endif 1358 lp = (struct disklabel *)data; 1359 1360 error = setdisklabel(vnd->sc_dkdev.dk_label, 1361 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1362 if (error == 0) { 1363 if (cmd == DIOCWDINFO 1364 #ifdef __HAVE_OLD_DISKLABEL 1365 || cmd == ODIOCWDINFO 1366 #endif 1367 ) 1368 error = writedisklabel(VNDLABELDEV(dev), 1369 vndstrategy, vnd->sc_dkdev.dk_label, 1370 vnd->sc_dkdev.dk_cpulabel); 1371 } 1372 1373 vnd->sc_flags &= ~VNF_LABELLING; 1374 1375 vndunlock(vnd); 1376 1377 if (error) 1378 return (error); 1379 break; 1380 } 1381 1382 case DIOCKLABEL: 1383 if (*(int *)data != 0) 1384 vnd->sc_flags |= VNF_KLABEL; 1385 else 1386 vnd->sc_flags &= ~VNF_KLABEL; 1387 break; 1388 1389 case DIOCWLABEL: 1390 if (*(int *)data != 0) 1391 vnd->sc_flags |= VNF_WLABEL; 1392 else 1393 vnd->sc_flags &= ~VNF_WLABEL; 1394 break; 1395 1396 case DIOCGDEFLABEL: 1397 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1398 break; 1399 1400 #ifdef __HAVE_OLD_DISKLABEL 1401 case ODIOCGDEFLABEL: 1402 vndgetdefaultlabel(vnd, &newlabel); 1403 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1404 return ENOTTY; 1405 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1406 break; 1407 #endif 1408 1409 default: 1410 return (ENOTTY); 1411 } 1412 1413 return (0); 1414 } 1415 1416 /* 1417 * Duplicate the current processes' credentials. Since we are called only 1418 * as the result of a SET ioctl and only root can do that, any future access 1419 * to this "disk" is essentially as root. Note that credentials may change 1420 * if some other uid can write directly to the mapped file (NFS). 1421 */ 1422 static int 1423 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1424 { 1425 struct uio auio; 1426 struct iovec aiov; 1427 char *tmpbuf; 1428 int error; 1429 1430 vnd->sc_cred = kauth_cred_dup(cred); 1431 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1432 1433 /* XXX: Horrible kludge to establish credentials for NFS */ 1434 aiov.iov_base = tmpbuf; 1435 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1436 auio.uio_iov = &aiov; 1437 auio.uio_iovcnt = 1; 1438 auio.uio_offset = 0; 1439 auio.uio_rw = UIO_READ; 1440 auio.uio_resid = aiov.iov_len; 1441 UIO_SETUP_SYSSPACE(&auio); 1442 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1443 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1444 if (error == 0) { 1445 /* 1446 * Because vnd does all IO directly through the vnode 1447 * we need to flush (at least) the buffer from the above 1448 * VOP_READ from the buffer cache to prevent cache 1449 * incoherencies. Also, be careful to write dirty 1450 * buffers back to stable storage. 1451 */ 1452 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1453 curlwp, 0, 0); 1454 } 1455 VOP_UNLOCK(vnd->sc_vp, 0); 1456 1457 free(tmpbuf, M_TEMP); 1458 return (error); 1459 } 1460 1461 /* 1462 * Set maxactive based on FS type 1463 */ 1464 static void 1465 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1466 { 1467 #ifdef NFS 1468 extern int (**nfsv2_vnodeop_p)(void *); 1469 1470 if (vp->v_op == nfsv2_vnodeop_p) 1471 vnd->sc_maxactive = 2; 1472 else 1473 #endif 1474 vnd->sc_maxactive = 8; 1475 1476 if (vnd->sc_maxactive < 1) 1477 vnd->sc_maxactive = 1; 1478 } 1479 1480 #if 0 1481 static void 1482 vndshutdown(void) 1483 { 1484 struct vnd_softc *vnd; 1485 1486 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1487 if (vnd->sc_flags & VNF_INITED) 1488 vndclear(vnd); 1489 } 1490 #endif 1491 1492 static void 1493 vndclear(struct vnd_softc *vnd, int myminor) 1494 { 1495 struct vnode *vp = vnd->sc_vp; 1496 struct lwp *l = curlwp; 1497 int fflags = FREAD; 1498 int bmaj, cmaj, i, mn; 1499 int s; 1500 1501 #ifdef DEBUG 1502 if (vnddebug & VDB_FOLLOW) 1503 printf("vndclear(%p): vp %p\n", vnd, vp); 1504 #endif 1505 /* locate the major number */ 1506 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1507 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1508 1509 /* Nuke the vnodes for any open instances */ 1510 for (i = 0; i < MAXPARTITIONS; i++) { 1511 mn = DISKMINOR(device_unit(&vnd->sc_dev), i); 1512 vdevgone(bmaj, mn, mn, VBLK); 1513 if (mn != myminor) /* XXX avoid to kill own vnode */ 1514 vdevgone(cmaj, mn, mn, VCHR); 1515 } 1516 1517 if ((vnd->sc_flags & VNF_READONLY) == 0) 1518 fflags |= FWRITE; 1519 1520 s = splbio(); 1521 bufq_drain(vnd->sc_tab); 1522 splx(s); 1523 1524 vnd->sc_flags |= VNF_VUNCONF; 1525 wakeup(&vnd->sc_tab); 1526 while (vnd->sc_flags & VNF_KTHREAD) 1527 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1528 1529 #ifdef VND_COMPRESSION 1530 /* free the compressed file buffers */ 1531 if(vnd->sc_flags & VNF_COMP) { 1532 if(vnd->sc_comp_offsets) { 1533 free(vnd->sc_comp_offsets, M_DEVBUF); 1534 vnd->sc_comp_offsets = NULL; 1535 } 1536 if(vnd->sc_comp_buff) { 1537 free(vnd->sc_comp_buff, M_DEVBUF); 1538 vnd->sc_comp_buff = NULL; 1539 } 1540 if(vnd->sc_comp_decombuf) { 1541 free(vnd->sc_comp_decombuf, M_DEVBUF); 1542 vnd->sc_comp_decombuf = NULL; 1543 } 1544 } 1545 #endif /* VND_COMPRESSION */ 1546 vnd->sc_flags &= 1547 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1548 | VNF_VUNCONF | VNF_COMP); 1549 if (vp == (struct vnode *)0) 1550 panic("vndclear: null vp"); 1551 (void) vn_close(vp, fflags, vnd->sc_cred, l); 1552 kauth_cred_free(vnd->sc_cred); 1553 vnd->sc_vp = (struct vnode *)0; 1554 vnd->sc_cred = (kauth_cred_t)0; 1555 vnd->sc_size = 0; 1556 } 1557 1558 static int 1559 vndsize(dev_t dev) 1560 { 1561 struct vnd_softc *sc; 1562 struct disklabel *lp; 1563 int part, unit, omask; 1564 int size; 1565 1566 unit = vndunit(dev); 1567 sc = (struct vnd_softc *)device_lookup(&vnd_cd, unit); 1568 if (sc == NULL) 1569 return -1; 1570 1571 if ((sc->sc_flags & VNF_INITED) == 0) 1572 return (-1); 1573 1574 part = DISKPART(dev); 1575 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1576 lp = sc->sc_dkdev.dk_label; 1577 1578 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1579 return (-1); 1580 1581 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1582 size = -1; 1583 else 1584 size = lp->d_partitions[part].p_size * 1585 (lp->d_secsize / DEV_BSIZE); 1586 1587 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1588 return (-1); 1589 1590 return (size); 1591 } 1592 1593 static int 1594 vnddump(dev_t dev, daddr_t blkno, void *va, 1595 size_t size) 1596 { 1597 1598 /* Not implemented. */ 1599 return ENXIO; 1600 } 1601 1602 static void 1603 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1604 { 1605 struct vndgeom *vng = &sc->sc_geom; 1606 struct partition *pp; 1607 1608 memset(lp, 0, sizeof(*lp)); 1609 1610 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1611 lp->d_secsize = vng->vng_secsize; 1612 lp->d_nsectors = vng->vng_nsectors; 1613 lp->d_ntracks = vng->vng_ntracks; 1614 lp->d_ncylinders = vng->vng_ncylinders; 1615 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1616 1617 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1618 lp->d_type = DTYPE_VND; 1619 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1620 lp->d_rpm = 3600; 1621 lp->d_interleave = 1; 1622 lp->d_flags = 0; 1623 1624 pp = &lp->d_partitions[RAW_PART]; 1625 pp->p_offset = 0; 1626 pp->p_size = lp->d_secperunit; 1627 pp->p_fstype = FS_UNUSED; 1628 lp->d_npartitions = RAW_PART + 1; 1629 1630 lp->d_magic = DISKMAGIC; 1631 lp->d_magic2 = DISKMAGIC; 1632 lp->d_checksum = dkcksum(lp); 1633 } 1634 1635 /* 1636 * Read the disklabel from a vnd. If one is not present, create a fake one. 1637 */ 1638 static void 1639 vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1640 { 1641 const char *errstring; 1642 struct disklabel *lp = sc->sc_dkdev.dk_label; 1643 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1644 int i; 1645 1646 memset(clp, 0, sizeof(*clp)); 1647 1648 vndgetdefaultlabel(sc, lp); 1649 1650 /* 1651 * Call the generic disklabel extraction routine. 1652 */ 1653 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1654 if (errstring) { 1655 /* 1656 * Lack of disklabel is common, but we print the warning 1657 * anyway, since it might contain other useful information. 1658 */ 1659 printf("%s: %s\n", sc->sc_dev.dv_xname, errstring); 1660 1661 /* 1662 * For historical reasons, if there's no disklabel 1663 * present, all partitions must be FS_BSDFFS and 1664 * occupy the entire disk. 1665 */ 1666 for (i = 0; i < MAXPARTITIONS; i++) { 1667 /* 1668 * Don't wipe out port specific hack (such as 1669 * dos partition hack of i386 port). 1670 */ 1671 if (lp->d_partitions[i].p_size != 0) 1672 continue; 1673 1674 lp->d_partitions[i].p_size = lp->d_secperunit; 1675 lp->d_partitions[i].p_offset = 0; 1676 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1677 } 1678 1679 strncpy(lp->d_packname, "default label", 1680 sizeof(lp->d_packname)); 1681 1682 lp->d_npartitions = MAXPARTITIONS; 1683 lp->d_checksum = dkcksum(lp); 1684 } 1685 1686 /* In-core label now valid. */ 1687 sc->sc_flags |= VNF_VLABEL; 1688 } 1689 1690 /* 1691 * Wait interruptibly for an exclusive lock. 1692 * 1693 * XXX 1694 * Several drivers do this; it should be abstracted and made MP-safe. 1695 */ 1696 static int 1697 vndlock(struct vnd_softc *sc) 1698 { 1699 int error; 1700 1701 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1702 sc->sc_flags |= VNF_WANTED; 1703 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1704 return (error); 1705 } 1706 sc->sc_flags |= VNF_LOCKED; 1707 return (0); 1708 } 1709 1710 /* 1711 * Unlock and wake up any waiters. 1712 */ 1713 static void 1714 vndunlock(struct vnd_softc *sc) 1715 { 1716 1717 sc->sc_flags &= ~VNF_LOCKED; 1718 if ((sc->sc_flags & VNF_WANTED) != 0) { 1719 sc->sc_flags &= ~VNF_WANTED; 1720 wakeup(sc); 1721 } 1722 } 1723 1724 #ifdef VND_COMPRESSION 1725 /* compressed file read */ 1726 static void 1727 compstrategy(struct buf *bp, off_t bn) 1728 { 1729 int error; 1730 int unit = vndunit(bp->b_dev); 1731 struct vnd_softc *vnd = 1732 (struct vnd_softc *)device_lookup(&vnd_cd, unit); 1733 u_int32_t comp_block; 1734 struct uio auio; 1735 char *addr; 1736 int s; 1737 1738 /* set up constants for data move */ 1739 auio.uio_rw = UIO_READ; 1740 UIO_SETUP_SYSSPACE(&auio); 1741 1742 /* read, and transfer the data */ 1743 addr = bp->b_data; 1744 bp->b_resid = bp->b_bcount; 1745 s = splbio(); 1746 while (bp->b_resid > 0) { 1747 unsigned length; 1748 size_t length_in_buffer; 1749 u_int32_t offset_in_buffer; 1750 struct iovec aiov; 1751 1752 /* calculate the compressed block number */ 1753 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1754 1755 /* check for good block number */ 1756 if (comp_block >= vnd->sc_comp_numoffs) { 1757 bp->b_error = EINVAL; 1758 splx(s); 1759 return; 1760 } 1761 1762 /* read in the compressed block, if not in buffer */ 1763 if (comp_block != vnd->sc_comp_buffblk) { 1764 length = vnd->sc_comp_offsets[comp_block + 1] - 1765 vnd->sc_comp_offsets[comp_block]; 1766 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1767 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1768 length, vnd->sc_comp_offsets[comp_block], 1769 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL); 1770 if (error) { 1771 bp->b_error = error; 1772 VOP_UNLOCK(vnd->sc_vp, 0); 1773 splx(s); 1774 return; 1775 } 1776 /* uncompress the buffer */ 1777 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1778 vnd->sc_comp_stream.avail_in = length; 1779 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1780 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1781 inflateReset(&vnd->sc_comp_stream); 1782 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1783 if (error != Z_STREAM_END) { 1784 if (vnd->sc_comp_stream.msg) 1785 printf("%s: compressed file, %s\n", 1786 vnd->sc_dev.dv_xname, 1787 vnd->sc_comp_stream.msg); 1788 bp->b_error = EBADMSG; 1789 VOP_UNLOCK(vnd->sc_vp, 0); 1790 splx(s); 1791 return; 1792 } 1793 vnd->sc_comp_buffblk = comp_block; 1794 VOP_UNLOCK(vnd->sc_vp, 0); 1795 } 1796 1797 /* transfer the usable uncompressed data */ 1798 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1799 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1800 if (length_in_buffer > bp->b_resid) 1801 length_in_buffer = bp->b_resid; 1802 auio.uio_iov = &aiov; 1803 auio.uio_iovcnt = 1; 1804 aiov.iov_base = addr; 1805 aiov.iov_len = length_in_buffer; 1806 auio.uio_resid = aiov.iov_len; 1807 auio.uio_offset = 0; 1808 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1809 length_in_buffer, &auio); 1810 if (error) { 1811 bp->b_error = error; 1812 splx(s); 1813 return; 1814 } 1815 1816 bn += length_in_buffer; 1817 addr += length_in_buffer; 1818 bp->b_resid -= length_in_buffer; 1819 } 1820 splx(s); 1821 } 1822 1823 /* compression memory allocation routines */ 1824 static void * 1825 vnd_alloc(void *aux, u_int items, u_int siz) 1826 { 1827 return malloc(items * siz, M_TEMP, M_NOWAIT); 1828 } 1829 1830 static void 1831 vnd_free(void *aux, void *ptr) 1832 { 1833 free(ptr, M_TEMP); 1834 } 1835 #endif /* VND_COMPRESSION */ 1836 1837 static void 1838 vnd_set_properties(struct vnd_softc *vnd) 1839 { 1840 prop_dictionary_t disk_info, odisk_info, geom; 1841 1842 disk_info = prop_dictionary_create(); 1843 1844 geom = prop_dictionary_create(); 1845 1846 prop_dictionary_set_uint64(geom, "sectors-per-unit", 1847 vnd->sc_geom.vng_nsectors * vnd->sc_geom.vng_ntracks * 1848 vnd->sc_geom.vng_ncylinders); 1849 1850 prop_dictionary_set_uint32(geom, "sector-size", 1851 vnd->sc_geom.vng_secsize); 1852 1853 prop_dictionary_set_uint16(geom, "sectors-per-track", 1854 vnd->sc_geom.vng_nsectors); 1855 1856 prop_dictionary_set_uint16(geom, "tracks-per-cylinder", 1857 vnd->sc_geom.vng_ntracks); 1858 1859 prop_dictionary_set_uint64(geom, "cylinders-per-unit", 1860 vnd->sc_geom.vng_ncylinders); 1861 1862 prop_dictionary_set(disk_info, "geometry", geom); 1863 prop_object_release(geom); 1864 1865 prop_dictionary_set(device_properties(&vnd->sc_dev), 1866 "disk-info", disk_info); 1867 1868 /* 1869 * Don't release disk_info here; we keep a reference to it. 1870 * disk_detach() will release it when we go away. 1871 */ 1872 1873 odisk_info = vnd->sc_dkdev.dk_info; 1874 vnd->sc_dkdev.dk_info = disk_info; 1875 if (odisk_info) 1876 prop_object_release(odisk_info); 1877 } 1878