1 /* $NetBSD: vnd.c,v 1.181 2008/05/05 13:41:30 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1990, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * the Systems Programming Group of the University of Utah Computer 38 * Science Department. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 65 * 66 * @(#)vn.c 8.9 (Berkeley) 5/14/95 67 */ 68 69 /* 70 * Copyright (c) 1988 University of Utah. 71 * 72 * This code is derived from software contributed to Berkeley by 73 * the Systems Programming Group of the University of Utah Computer 74 * Science Department. 75 * 76 * Redistribution and use in source and binary forms, with or without 77 * modification, are permitted provided that the following conditions 78 * are met: 79 * 1. Redistributions of source code must retain the above copyright 80 * notice, this list of conditions and the following disclaimer. 81 * 2. Redistributions in binary form must reproduce the above copyright 82 * notice, this list of conditions and the following disclaimer in the 83 * documentation and/or other materials provided with the distribution. 84 * 3. All advertising materials mentioning features or use of this software 85 * must display the following acknowledgement: 86 * This product includes software developed by the University of 87 * California, Berkeley and its contributors. 88 * 4. Neither the name of the University nor the names of its contributors 89 * may be used to endorse or promote products derived from this software 90 * without specific prior written permission. 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 102 * SUCH DAMAGE. 103 * 104 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 105 * 106 * @(#)vn.c 8.9 (Berkeley) 5/14/95 107 */ 108 109 /* 110 * Vnode disk driver. 111 * 112 * Block/character interface to a vnode. Allows one to treat a file 113 * as a disk (e.g. build a filesystem in it, mount it, etc.). 114 * 115 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 116 * this uses them to avoid distorting the local buffer cache. If those 117 * block-level operations are not available, this falls back to the regular 118 * read and write calls. Using these may distort the cache in some cases 119 * but better have the driver working than preventing it to work on file 120 * systems where the block-level operations are not implemented for 121 * whatever reason. 122 * 123 * NOTE 2: There is a security issue involved with this driver. 124 * Once mounted all access to the contents of the "mapped" file via 125 * the special file is controlled by the permissions on the special 126 * file, the protection of the mapped file is ignored (effectively, 127 * by using root credentials in all transactions). 128 * 129 * NOTE 3: Doesn't interact with leases, should it? 130 */ 131 132 #include <sys/cdefs.h> 133 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.181 2008/05/05 13:41:30 ad Exp $"); 134 135 #if defined(_KERNEL_OPT) 136 #include "fs_nfs.h" 137 #include "opt_vnd.h" 138 #endif 139 140 #include <sys/param.h> 141 #include <sys/systm.h> 142 #include <sys/namei.h> 143 #include <sys/proc.h> 144 #include <sys/kthread.h> 145 #include <sys/errno.h> 146 #include <sys/buf.h> 147 #include <sys/bufq.h> 148 #include <sys/malloc.h> 149 #include <sys/ioctl.h> 150 #include <sys/disklabel.h> 151 #include <sys/device.h> 152 #include <sys/disk.h> 153 #include <sys/stat.h> 154 #include <sys/mount.h> 155 #include <sys/vnode.h> 156 #include <sys/file.h> 157 #include <sys/uio.h> 158 #include <sys/conf.h> 159 #include <sys/kauth.h> 160 161 #include <net/zlib.h> 162 163 #include <miscfs/genfs/genfs.h> 164 #include <miscfs/specfs/specdev.h> 165 166 #include <dev/vndvar.h> 167 168 #include <prop/proplib.h> 169 170 #if defined(VNDDEBUG) && !defined(DEBUG) 171 #define DEBUG 172 #endif 173 174 #ifdef DEBUG 175 int dovndcluster = 1; 176 #define VDB_FOLLOW 0x01 177 #define VDB_INIT 0x02 178 #define VDB_IO 0x04 179 #define VDB_LABEL 0x08 180 int vnddebug = 0x00; 181 #endif 182 183 #define vndunit(x) DISKUNIT(x) 184 185 struct vndxfer { 186 struct buf vx_buf; 187 struct vnd_softc *vx_vnd; 188 }; 189 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 190 191 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 192 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 193 194 #define VNDLABELDEV(dev) \ 195 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 196 197 /* called by main() at boot time (XXX: and the LKM driver) */ 198 void vndattach(int); 199 200 static void vndclear(struct vnd_softc *, int); 201 static int vndsetcred(struct vnd_softc *, kauth_cred_t); 202 static void vndthrottle(struct vnd_softc *, struct vnode *); 203 static void vndiodone(struct buf *); 204 #if 0 205 static void vndshutdown(void); 206 #endif 207 208 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 209 static void vndgetdisklabel(dev_t, struct vnd_softc *); 210 211 static int vndlock(struct vnd_softc *); 212 static void vndunlock(struct vnd_softc *); 213 #ifdef VND_COMPRESSION 214 static void compstrategy(struct buf *, off_t); 215 static void *vnd_alloc(void *, u_int, u_int); 216 static void vnd_free(void *, void *); 217 #endif /* VND_COMPRESSION */ 218 219 static void vndthread(void *); 220 static bool vnode_has_op(const struct vnode *, int); 221 static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 222 struct buf *); 223 static void handle_with_strategy(struct vnd_softc *, const struct buf *, 224 struct buf *); 225 static void vnd_set_properties(struct vnd_softc *); 226 227 static dev_type_open(vndopen); 228 static dev_type_close(vndclose); 229 static dev_type_read(vndread); 230 static dev_type_write(vndwrite); 231 static dev_type_ioctl(vndioctl); 232 static dev_type_strategy(vndstrategy); 233 static dev_type_dump(vnddump); 234 static dev_type_size(vndsize); 235 236 const struct bdevsw vnd_bdevsw = { 237 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 238 }; 239 240 const struct cdevsw vnd_cdevsw = { 241 vndopen, vndclose, vndread, vndwrite, vndioctl, 242 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 243 }; 244 245 static int vnd_match(device_t, cfdata_t, void *); 246 static void vnd_attach(device_t, device_t, void *); 247 static int vnd_detach(device_t, int); 248 249 CFATTACH_DECL_NEW(vnd, sizeof(struct vnd_softc), 250 vnd_match, vnd_attach, vnd_detach, NULL); 251 extern struct cfdriver vnd_cd; 252 253 static struct vnd_softc *vnd_spawn(int); 254 int vnd_destroy(device_t); 255 256 void 257 vndattach(int num) 258 { 259 int error; 260 261 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 262 if (error) 263 aprint_error("%s: unable to register cfattach\n", 264 vnd_cd.cd_name); 265 } 266 267 static int 268 vnd_match(device_t self, cfdata_t cfdata, void *aux) 269 { 270 271 return 1; 272 } 273 274 static void 275 vnd_attach(device_t parent, device_t self, void *aux) 276 { 277 struct vnd_softc *sc = device_private(self); 278 279 sc->sc_dev = self; 280 sc->sc_comp_offsets = NULL; 281 sc->sc_comp_buff = NULL; 282 sc->sc_comp_decombuf = NULL; 283 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 284 disk_init(&sc->sc_dkdev, device_xname(self), NULL); 285 if (!pmf_device_register(self, NULL, NULL)) 286 aprint_error_dev(self, "couldn't establish power handler\n"); 287 } 288 289 static int 290 vnd_detach(device_t self, int flags) 291 { 292 struct vnd_softc *sc = device_private(self); 293 if (sc->sc_flags & VNF_INITED) 294 return EBUSY; 295 296 pmf_device_deregister(self); 297 bufq_free(sc->sc_tab); 298 disk_destroy(&sc->sc_dkdev); 299 300 return 0; 301 } 302 303 static struct vnd_softc * 304 vnd_spawn(int unit) 305 { 306 struct cfdata *cf; 307 308 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 309 cf->cf_name = vnd_cd.cd_name; 310 cf->cf_atname = vnd_cd.cd_name; 311 cf->cf_unit = unit; 312 cf->cf_fstate = FSTATE_STAR; 313 314 return device_private(config_attach_pseudo(cf)); 315 } 316 317 int 318 vnd_destroy(device_t dev) 319 { 320 int error; 321 cfdata_t cf; 322 323 cf = device_cfdata(dev); 324 error = config_detach(dev, DETACH_QUIET); 325 if (error) 326 return error; 327 free(cf, M_DEVBUF); 328 return 0; 329 } 330 331 static int 332 vndopen(dev_t dev, int flags, int mode, struct lwp *l) 333 { 334 int unit = vndunit(dev); 335 struct vnd_softc *sc; 336 int error = 0, part, pmask; 337 struct disklabel *lp; 338 339 #ifdef DEBUG 340 if (vnddebug & VDB_FOLLOW) 341 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 342 #endif 343 sc = device_private(device_lookup(&vnd_cd, unit)); 344 if (sc == NULL) { 345 sc = vnd_spawn(unit); 346 if (sc == NULL) 347 return ENOMEM; 348 } 349 350 if ((error = vndlock(sc)) != 0) 351 return (error); 352 353 lp = sc->sc_dkdev.dk_label; 354 355 part = DISKPART(dev); 356 pmask = (1 << part); 357 358 /* 359 * If we're initialized, check to see if there are any other 360 * open partitions. If not, then it's safe to update the 361 * in-core disklabel. Only read the disklabel if it is 362 * not already valid. 363 */ 364 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 365 sc->sc_dkdev.dk_openmask == 0) 366 vndgetdisklabel(dev, sc); 367 368 /* Check that the partitions exists. */ 369 if (part != RAW_PART) { 370 if (((sc->sc_flags & VNF_INITED) == 0) || 371 ((part >= lp->d_npartitions) || 372 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 373 error = ENXIO; 374 goto done; 375 } 376 } 377 378 /* Prevent our unit from being unconfigured while open. */ 379 switch (mode) { 380 case S_IFCHR: 381 sc->sc_dkdev.dk_copenmask |= pmask; 382 break; 383 384 case S_IFBLK: 385 sc->sc_dkdev.dk_bopenmask |= pmask; 386 break; 387 } 388 sc->sc_dkdev.dk_openmask = 389 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 390 391 done: 392 vndunlock(sc); 393 return (error); 394 } 395 396 static int 397 vndclose(dev_t dev, int flags, int mode, struct lwp *l) 398 { 399 int unit = vndunit(dev); 400 struct vnd_softc *sc; 401 int error = 0, part; 402 403 #ifdef DEBUG 404 if (vnddebug & VDB_FOLLOW) 405 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 406 #endif 407 sc = device_private(device_lookup(&vnd_cd, unit)); 408 if (sc == NULL) 409 return ENXIO; 410 411 if ((error = vndlock(sc)) != 0) 412 return (error); 413 414 part = DISKPART(dev); 415 416 /* ...that much closer to allowing unconfiguration... */ 417 switch (mode) { 418 case S_IFCHR: 419 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 420 break; 421 422 case S_IFBLK: 423 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 424 break; 425 } 426 sc->sc_dkdev.dk_openmask = 427 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 428 429 vndunlock(sc); 430 431 if ((sc->sc_flags & VNF_INITED) == 0) { 432 if ((error = vnd_destroy(sc->sc_dev)) != 0) { 433 aprint_error_dev(sc->sc_dev, 434 "unable to detach instance\n"); 435 return error; 436 } 437 } 438 439 return (0); 440 } 441 442 /* 443 * Queue the request, and wakeup the kernel thread to handle it. 444 */ 445 static void 446 vndstrategy(struct buf *bp) 447 { 448 int unit = vndunit(bp->b_dev); 449 struct vnd_softc *vnd = 450 device_private(device_lookup(&vnd_cd, unit)); 451 struct disklabel *lp = vnd->sc_dkdev.dk_label; 452 daddr_t blkno; 453 int s = splbio(); 454 455 if ((vnd->sc_flags & VNF_INITED) == 0) { 456 bp->b_error = ENXIO; 457 goto done; 458 } 459 460 /* 461 * The transfer must be a whole number of blocks. 462 */ 463 if ((bp->b_bcount % lp->d_secsize) != 0) { 464 bp->b_error = EINVAL; 465 goto done; 466 } 467 468 /* 469 * check if we're read-only. 470 */ 471 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 472 bp->b_error = EACCES; 473 goto done; 474 } 475 476 /* If it's a nil transfer, wake up the top half now. */ 477 if (bp->b_bcount == 0) { 478 goto done; 479 } 480 481 /* 482 * Do bounds checking and adjust transfer. If there's an error, 483 * the bounds check will flag that for us. 484 */ 485 if (DISKPART(bp->b_dev) == RAW_PART) { 486 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 487 vnd->sc_size) <= 0) 488 goto done; 489 } else { 490 if (bounds_check_with_label(&vnd->sc_dkdev, 491 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 492 goto done; 493 } 494 495 /* 496 * Put the block number in terms of the logical blocksize 497 * of the "device". 498 */ 499 500 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 501 502 /* 503 * Translate the partition-relative block number to an absolute. 504 */ 505 if (DISKPART(bp->b_dev) != RAW_PART) { 506 struct partition *pp; 507 508 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 509 DISKPART(bp->b_dev)]; 510 blkno += pp->p_offset; 511 } 512 bp->b_rawblkno = blkno; 513 514 #ifdef DEBUG 515 if (vnddebug & VDB_FOLLOW) 516 printf("vndstrategy(%p): unit %d\n", bp, unit); 517 #endif 518 BUFQ_PUT(vnd->sc_tab, bp); 519 wakeup(&vnd->sc_tab); 520 splx(s); 521 return; 522 523 done: 524 bp->b_resid = bp->b_bcount; 525 biodone(bp); 526 splx(s); 527 } 528 529 static void 530 vndthread(void *arg) 531 { 532 struct vnd_softc *vnd = arg; 533 bool usestrategy; 534 int s; 535 536 /* Determine whether we can use VOP_BMAP and VOP_STRATEGY to 537 * directly access the backing vnode. If we can, use these two 538 * operations to avoid messing with the local buffer cache. 539 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 540 * which are guaranteed to work with any file system. */ 541 usestrategy = vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 542 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 543 544 #ifdef DEBUG 545 if (vnddebug & VDB_INIT) 546 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 547 usestrategy ? 548 "using bmap/strategy operations" : 549 "using read/write operations"); 550 #endif 551 552 s = splbio(); 553 vnd->sc_flags |= VNF_KTHREAD; 554 wakeup(&vnd->sc_kthread); 555 556 /* 557 * Dequeue requests and serve them depending on the available 558 * vnode operations. 559 */ 560 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 561 struct vndxfer *vnx; 562 int flags; 563 struct buf *obp; 564 struct buf *bp; 565 566 obp = BUFQ_GET(vnd->sc_tab); 567 if (obp == NULL) { 568 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 569 continue; 570 }; 571 splx(s); 572 flags = obp->b_flags; 573 #ifdef DEBUG 574 if (vnddebug & VDB_FOLLOW) 575 printf("vndthread(%p\n", obp); 576 #endif 577 578 if (vnd->sc_vp->v_mount == NULL) { 579 obp->b_error = ENXIO; 580 goto done; 581 } 582 #ifdef VND_COMPRESSION 583 /* handle a compressed read */ 584 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 585 off_t bn; 586 587 /* Convert to a byte offset within the file. */ 588 bn = obp->b_rawblkno * 589 vnd->sc_dkdev.dk_label->d_secsize; 590 591 compstrategy(obp, bn); 592 goto done; 593 } 594 #endif /* VND_COMPRESSION */ 595 596 /* 597 * Allocate a header for this transfer and link it to the 598 * buffer 599 */ 600 s = splbio(); 601 vnx = VND_GETXFER(vnd); 602 splx(s); 603 vnx->vx_vnd = vnd; 604 605 s = splbio(); 606 while (vnd->sc_active >= vnd->sc_maxactive) { 607 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 608 } 609 vnd->sc_active++; 610 splx(s); 611 612 /* Instrumentation. */ 613 disk_busy(&vnd->sc_dkdev); 614 615 bp = &vnx->vx_buf; 616 buf_init(bp); 617 bp->b_flags = (obp->b_flags & B_READ); 618 bp->b_oflags = obp->b_oflags; 619 bp->b_cflags = obp->b_cflags; 620 bp->b_iodone = vndiodone; 621 bp->b_private = obp; 622 bp->b_vp = vnd->sc_vp; 623 bp->b_objlock = &bp->b_vp->v_interlock; 624 bp->b_data = obp->b_data; 625 bp->b_bcount = obp->b_bcount; 626 BIO_COPYPRIO(bp, obp); 627 628 /* Handle the request using the appropriate operations. */ 629 if (usestrategy) 630 handle_with_strategy(vnd, obp, bp); 631 else 632 handle_with_rdwr(vnd, obp, bp); 633 634 s = splbio(); 635 continue; 636 637 done: 638 biodone(obp); 639 s = splbio(); 640 } 641 642 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 643 wakeup(&vnd->sc_kthread); 644 splx(s); 645 kthread_exit(0); 646 } 647 648 /* 649 * Checks if the given vnode supports the requested operation. 650 * The operation is specified the offset returned by VOFFSET. 651 * 652 * XXX The test below used to determine this is quite fragile 653 * because it relies on the file system to use genfs to specify 654 * unimplemented operations. There might be another way to do 655 * it more cleanly. 656 */ 657 static bool 658 vnode_has_op(const struct vnode *vp, int opoffset) 659 { 660 int (*defaultp)(void *); 661 int (*opp)(void *); 662 663 defaultp = vp->v_op[VOFFSET(vop_default)]; 664 opp = vp->v_op[opoffset]; 665 666 return opp != defaultp && opp != genfs_eopnotsupp && 667 opp != genfs_badop && opp != genfs_nullop; 668 } 669 670 /* 671 * Handes the read/write request given in 'bp' using the vnode's VOP_READ 672 * and VOP_WRITE operations. 673 * 674 * 'obp' is a pointer to the original request fed to the vnd device. 675 */ 676 static void 677 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 678 { 679 bool doread; 680 off_t offset; 681 size_t resid; 682 struct vnode *vp; 683 684 doread = bp->b_flags & B_READ; 685 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 686 vp = vnd->sc_vp; 687 688 #if defined(DEBUG) 689 if (vnddebug & VDB_IO) 690 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 691 ", secsize %d, offset %" PRIu64 692 ", bcount %d\n", 693 vp, doread ? "read" : "write", obp->b_rawblkno, 694 vnd->sc_dkdev.dk_label->d_secsize, offset, 695 bp->b_bcount); 696 #endif 697 698 /* Issue the read or write operation. */ 699 bp->b_error = 700 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 701 vp, bp->b_data, bp->b_bcount, offset, 702 UIO_SYSSPACE, 0, vnd->sc_cred, &resid, NULL); 703 bp->b_resid = resid; 704 705 /* We need to increase the number of outputs on the vnode if 706 * there was any write to it. */ 707 if (!doread) { 708 mutex_enter(&vp->v_interlock); 709 vp->v_numoutput++; 710 mutex_exit(&vp->v_interlock); 711 } 712 713 biodone(bp); 714 } 715 716 /* 717 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 718 * and VOP_STRATEGY operations. 719 * 720 * 'obp' is a pointer to the original request fed to the vnd device. 721 */ 722 static void 723 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 724 struct buf *bp) 725 { 726 int bsize, error, flags, skipped; 727 size_t resid, sz; 728 off_t bn, offset; 729 struct vnode *vp; 730 731 flags = obp->b_flags; 732 733 if (!(flags & B_READ)) { 734 vp = bp->b_vp; 735 mutex_enter(&vp->v_interlock); 736 vp->v_numoutput++; 737 mutex_exit(&vp->v_interlock); 738 } 739 740 /* convert to a byte offset within the file. */ 741 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 742 743 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 744 skipped = 0; 745 746 /* 747 * Break the request into bsize pieces and feed them 748 * sequentially using VOP_BMAP/VOP_STRATEGY. 749 * We do it this way to keep from flooding NFS servers if we 750 * are connected to an NFS file. This places the burden on 751 * the client rather than the server. 752 */ 753 error = 0; 754 bp->b_resid = bp->b_bcount; 755 for (offset = 0, resid = bp->b_resid; resid; 756 resid -= sz, offset += sz) { 757 struct buf *nbp; 758 daddr_t nbn; 759 int off, nra; 760 761 nra = 0; 762 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 763 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 764 VOP_UNLOCK(vnd->sc_vp, 0); 765 766 if (error == 0 && (long)nbn == -1) 767 error = EIO; 768 769 /* 770 * If there was an error or a hole in the file...punt. 771 * Note that we may have to wait for any operations 772 * that we have already fired off before releasing 773 * the buffer. 774 * 775 * XXX we could deal with holes here but it would be 776 * a hassle (in the write case). 777 */ 778 if (error) { 779 skipped += resid; 780 break; 781 } 782 783 #ifdef DEBUG 784 if (!dovndcluster) 785 nra = 0; 786 #endif 787 788 off = bn % bsize; 789 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 790 #ifdef DEBUG 791 if (vnddebug & VDB_IO) 792 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 793 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, 794 nbn, sz); 795 #endif 796 797 nbp = getiobuf(vp, true); 798 nestiobuf_setup(bp, nbp, offset, sz); 799 nbp->b_blkno = nbn + btodb(off); 800 801 #if 0 /* XXX #ifdef DEBUG */ 802 if (vnddebug & VDB_IO) 803 printf("vndstart(%ld): bp %p vp %p blkno " 804 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 805 (long) (vnd-vnd_softc), &nbp->vb_buf, 806 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 807 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 808 nbp->vb_buf.b_bcount); 809 #endif 810 VOP_STRATEGY(vp, nbp); 811 bn += sz; 812 } 813 nestiobuf_done(bp, skipped, error); 814 } 815 816 static void 817 vndiodone(struct buf *bp) 818 { 819 struct vndxfer *vnx = VND_BUFTOXFER(bp); 820 struct vnd_softc *vnd = vnx->vx_vnd; 821 struct buf *obp = bp->b_private; 822 823 KASSERT(&vnx->vx_buf == bp); 824 KASSERT(vnd->sc_active > 0); 825 #ifdef DEBUG 826 if (vnddebug & VDB_IO) { 827 printf("vndiodone1: bp %p iodone: error %d\n", 828 bp, bp->b_error); 829 } 830 #endif 831 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 832 (bp->b_flags & B_READ)); 833 vnd->sc_active--; 834 if (vnd->sc_active == 0) { 835 wakeup(&vnd->sc_tab); 836 } 837 obp->b_error = bp->b_error; 838 obp->b_resid = bp->b_resid; 839 VND_PUTXFER(vnd, vnx); 840 biodone(obp); 841 } 842 843 /* ARGSUSED */ 844 static int 845 vndread(dev_t dev, struct uio *uio, int flags) 846 { 847 int unit = vndunit(dev); 848 struct vnd_softc *sc; 849 850 #ifdef DEBUG 851 if (vnddebug & VDB_FOLLOW) 852 printf("vndread(0x%x, %p)\n", dev, uio); 853 #endif 854 855 sc = device_private(device_lookup(&vnd_cd, unit)); 856 if (sc == NULL) 857 return ENXIO; 858 859 if ((sc->sc_flags & VNF_INITED) == 0) 860 return (ENXIO); 861 862 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 863 } 864 865 /* ARGSUSED */ 866 static int 867 vndwrite(dev_t dev, struct uio *uio, int flags) 868 { 869 int unit = vndunit(dev); 870 struct vnd_softc *sc; 871 872 #ifdef DEBUG 873 if (vnddebug & VDB_FOLLOW) 874 printf("vndwrite(0x%x, %p)\n", dev, uio); 875 #endif 876 877 sc = device_private(device_lookup(&vnd_cd, unit)); 878 if (sc == NULL) 879 return ENXIO; 880 881 if ((sc->sc_flags & VNF_INITED) == 0) 882 return (ENXIO); 883 884 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 885 } 886 887 static int 888 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 889 { 890 struct vnd_softc *vnd; 891 892 if (*un == -1) 893 *un = unit; 894 if (*un < 0) 895 return EINVAL; 896 897 vnd = device_private(device_lookup(&vnd_cd, *un)); 898 if (vnd == NULL) 899 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1; 900 901 if ((vnd->sc_flags & VNF_INITED) == 0) 902 return -1; 903 904 return VOP_GETATTR(vnd->sc_vp, va, l->l_cred); 905 } 906 907 /* ARGSUSED */ 908 static int 909 vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 910 { 911 int unit = vndunit(dev); 912 struct vnd_softc *vnd; 913 struct vnd_ioctl *vio; 914 struct vattr vattr; 915 struct nameidata nd; 916 int error, part, pmask; 917 size_t geomsize; 918 int fflags; 919 #ifdef __HAVE_OLD_DISKLABEL 920 struct disklabel newlabel; 921 #endif 922 923 #ifdef DEBUG 924 if (vnddebug & VDB_FOLLOW) 925 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 926 dev, cmd, data, flag, l->l_proc, unit); 927 #endif 928 vnd = device_private(device_lookup(&vnd_cd, unit)); 929 if (vnd == NULL && 930 #ifdef COMPAT_30 931 cmd != VNDIOOCGET && 932 #endif 933 cmd != VNDIOCGET) 934 return ENXIO; 935 vio = (struct vnd_ioctl *)data; 936 937 /* Must be open for writes for these commands... */ 938 switch (cmd) { 939 case VNDIOCSET: 940 case VNDIOCCLR: 941 case DIOCSDINFO: 942 case DIOCWDINFO: 943 #ifdef __HAVE_OLD_DISKLABEL 944 case ODIOCSDINFO: 945 case ODIOCWDINFO: 946 #endif 947 case DIOCKLABEL: 948 case DIOCWLABEL: 949 if ((flag & FWRITE) == 0) 950 return (EBADF); 951 } 952 953 /* Must be initialized for these... */ 954 switch (cmd) { 955 case VNDIOCCLR: 956 case DIOCGDINFO: 957 case DIOCSDINFO: 958 case DIOCWDINFO: 959 case DIOCGPART: 960 case DIOCKLABEL: 961 case DIOCWLABEL: 962 case DIOCGDEFLABEL: 963 #ifdef __HAVE_OLD_DISKLABEL 964 case ODIOCGDINFO: 965 case ODIOCSDINFO: 966 case ODIOCWDINFO: 967 case ODIOCGDEFLABEL: 968 #endif 969 if ((vnd->sc_flags & VNF_INITED) == 0) 970 return (ENXIO); 971 } 972 973 switch (cmd) { 974 case VNDIOCSET: 975 if (vnd->sc_flags & VNF_INITED) 976 return (EBUSY); 977 978 if ((error = vndlock(vnd)) != 0) 979 return (error); 980 981 fflags = FREAD; 982 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 983 fflags |= FWRITE; 984 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file); 985 if ((error = vn_open(&nd, fflags, 0)) != 0) 986 goto unlock_and_exit; 987 KASSERT(l); 988 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred); 989 if (!error && nd.ni_vp->v_type != VREG) 990 error = EOPNOTSUPP; 991 if (error) { 992 VOP_UNLOCK(nd.ni_vp, 0); 993 goto close_and_exit; 994 } 995 996 /* If using a compressed file, initialize its info */ 997 /* (or abort with an error if kernel has no compression) */ 998 if (vio->vnd_flags & VNF_COMP) { 999 #ifdef VND_COMPRESSION 1000 struct vnd_comp_header *ch; 1001 int i; 1002 u_int32_t comp_size; 1003 u_int32_t comp_maxsize; 1004 1005 /* allocate space for compresed file header */ 1006 ch = malloc(sizeof(struct vnd_comp_header), 1007 M_TEMP, M_WAITOK); 1008 1009 /* read compressed file header */ 1010 error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch, 1011 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1012 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1013 if(error) { 1014 free(ch, M_TEMP); 1015 VOP_UNLOCK(nd.ni_vp, 0); 1016 goto close_and_exit; 1017 } 1018 1019 /* save some header info */ 1020 vnd->sc_comp_blksz = ntohl(ch->block_size); 1021 /* note last offset is the file byte size */ 1022 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 1023 free(ch, M_TEMP); 1024 if (vnd->sc_comp_blksz == 0 || 1025 vnd->sc_comp_blksz % DEV_BSIZE !=0) { 1026 VOP_UNLOCK(nd.ni_vp, 0); 1027 error = EINVAL; 1028 goto close_and_exit; 1029 } 1030 if(sizeof(struct vnd_comp_header) + 1031 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 1032 vattr.va_size) { 1033 VOP_UNLOCK(nd.ni_vp, 0); 1034 error = EINVAL; 1035 goto close_and_exit; 1036 } 1037 1038 /* set decompressed file size */ 1039 vattr.va_size = 1040 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1041 (u_quad_t)vnd->sc_comp_blksz; 1042 1043 /* allocate space for all the compressed offsets */ 1044 vnd->sc_comp_offsets = 1045 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 1046 M_DEVBUF, M_WAITOK); 1047 1048 /* read in the offsets */ 1049 error = vn_rdwr(UIO_READ, nd.ni_vp, 1050 (void *)vnd->sc_comp_offsets, 1051 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 1052 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1053 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1054 if(error) { 1055 VOP_UNLOCK(nd.ni_vp, 0); 1056 goto close_and_exit; 1057 } 1058 /* 1059 * find largest block size (used for allocation limit). 1060 * Also convert offset to native byte order. 1061 */ 1062 comp_maxsize = 0; 1063 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1064 vnd->sc_comp_offsets[i] = 1065 be64toh(vnd->sc_comp_offsets[i]); 1066 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 1067 - vnd->sc_comp_offsets[i]; 1068 if (comp_size > comp_maxsize) 1069 comp_maxsize = comp_size; 1070 } 1071 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1072 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 1073 1074 /* create compressed data buffer */ 1075 vnd->sc_comp_buff = malloc(comp_maxsize, 1076 M_DEVBUF, M_WAITOK); 1077 1078 /* create decompressed buffer */ 1079 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1080 M_DEVBUF, M_WAITOK); 1081 vnd->sc_comp_buffblk = -1; 1082 1083 /* Initialize decompress stream */ 1084 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 1085 vnd->sc_comp_stream.zalloc = vnd_alloc; 1086 vnd->sc_comp_stream.zfree = vnd_free; 1087 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1088 if(error) { 1089 if(vnd->sc_comp_stream.msg) 1090 printf("vnd%d: compressed file, %s\n", 1091 unit, vnd->sc_comp_stream.msg); 1092 VOP_UNLOCK(nd.ni_vp, 0); 1093 error = EINVAL; 1094 goto close_and_exit; 1095 } 1096 1097 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1098 #else /* !VND_COMPRESSION */ 1099 VOP_UNLOCK(nd.ni_vp, 0); 1100 error = EOPNOTSUPP; 1101 goto close_and_exit; 1102 #endif /* VND_COMPRESSION */ 1103 } 1104 1105 VOP_UNLOCK(nd.ni_vp, 0); 1106 vnd->sc_vp = nd.ni_vp; 1107 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1108 1109 /* 1110 * Use pseudo-geometry specified. If none was provided, 1111 * use "standard" Adaptec fictitious geometry. 1112 */ 1113 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1114 1115 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1116 sizeof(vio->vnd_geom)); 1117 1118 /* 1119 * Sanity-check the sector size. 1120 * XXX Don't allow secsize < DEV_BSIZE. Should 1121 * XXX we? 1122 */ 1123 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 1124 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 1125 vnd->sc_geom.vng_ncylinders == 0 || 1126 (vnd->sc_geom.vng_ntracks * 1127 vnd->sc_geom.vng_nsectors) == 0) { 1128 error = EINVAL; 1129 goto close_and_exit; 1130 } 1131 1132 /* 1133 * Compute the size (in DEV_BSIZE blocks) specified 1134 * by the geometry. 1135 */ 1136 geomsize = (vnd->sc_geom.vng_nsectors * 1137 vnd->sc_geom.vng_ntracks * 1138 vnd->sc_geom.vng_ncylinders) * 1139 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1140 1141 /* 1142 * Sanity-check the size against the specified 1143 * geometry. 1144 */ 1145 if (vnd->sc_size < geomsize) { 1146 error = EINVAL; 1147 goto close_and_exit; 1148 } 1149 } else if (vnd->sc_size >= (32 * 64)) { 1150 /* 1151 * Size must be at least 2048 DEV_BSIZE blocks 1152 * (1M) in order to use this geometry. 1153 */ 1154 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1155 vnd->sc_geom.vng_nsectors = 32; 1156 vnd->sc_geom.vng_ntracks = 64; 1157 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1158 } else { 1159 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1160 vnd->sc_geom.vng_nsectors = 1; 1161 vnd->sc_geom.vng_ntracks = 1; 1162 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1163 } 1164 1165 vnd_set_properties(vnd); 1166 1167 if (vio->vnd_flags & VNDIOF_READONLY) { 1168 vnd->sc_flags |= VNF_READONLY; 1169 } 1170 1171 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1172 goto close_and_exit; 1173 1174 vndthrottle(vnd, vnd->sc_vp); 1175 vio->vnd_size = dbtob(vnd->sc_size); 1176 vnd->sc_flags |= VNF_INITED; 1177 1178 /* create the kernel thread, wait for it to be up */ 1179 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, 1180 &vnd->sc_kthread, device_xname(vnd->sc_dev)); 1181 if (error) 1182 goto close_and_exit; 1183 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1184 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1185 } 1186 #ifdef DEBUG 1187 if (vnddebug & VDB_INIT) 1188 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1189 vnd->sc_vp, (unsigned long) vnd->sc_size, 1190 vnd->sc_geom.vng_secsize, 1191 vnd->sc_geom.vng_nsectors, 1192 vnd->sc_geom.vng_ntracks, 1193 vnd->sc_geom.vng_ncylinders); 1194 #endif 1195 1196 /* Attach the disk. */ 1197 disk_attach(&vnd->sc_dkdev); 1198 1199 /* Initialize the xfer and buffer pools. */ 1200 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1201 0, 0, "vndxpl", NULL, IPL_BIO); 1202 1203 /* Try and read the disklabel. */ 1204 vndgetdisklabel(dev, vnd); 1205 1206 vndunlock(vnd); 1207 1208 break; 1209 1210 close_and_exit: 1211 (void) vn_close(nd.ni_vp, fflags, l->l_cred); 1212 unlock_and_exit: 1213 #ifdef VND_COMPRESSION 1214 /* free any allocated memory (for compressed file) */ 1215 if(vnd->sc_comp_offsets) { 1216 free(vnd->sc_comp_offsets, M_DEVBUF); 1217 vnd->sc_comp_offsets = NULL; 1218 } 1219 if(vnd->sc_comp_buff) { 1220 free(vnd->sc_comp_buff, M_DEVBUF); 1221 vnd->sc_comp_buff = NULL; 1222 } 1223 if(vnd->sc_comp_decombuf) { 1224 free(vnd->sc_comp_decombuf, M_DEVBUF); 1225 vnd->sc_comp_decombuf = NULL; 1226 } 1227 #endif /* VND_COMPRESSION */ 1228 vndunlock(vnd); 1229 return (error); 1230 1231 case VNDIOCCLR: 1232 if ((error = vndlock(vnd)) != 0) 1233 return (error); 1234 1235 /* 1236 * Don't unconfigure if any other partitions are open 1237 * or if both the character and block flavors of this 1238 * partition are open. 1239 */ 1240 part = DISKPART(dev); 1241 pmask = (1 << part); 1242 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1243 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1244 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1245 !(vio->vnd_flags & VNDIOF_FORCE)) { 1246 vndunlock(vnd); 1247 return (EBUSY); 1248 } 1249 1250 /* 1251 * XXX vndclear() might call vndclose() implicitely; 1252 * release lock to avoid recursion 1253 */ 1254 vndunlock(vnd); 1255 vndclear(vnd, minor(dev)); 1256 #ifdef DEBUG 1257 if (vnddebug & VDB_INIT) 1258 printf("vndioctl: CLRed\n"); 1259 #endif 1260 1261 /* Destroy the xfer and buffer pools. */ 1262 pool_destroy(&vnd->sc_vxpool); 1263 1264 /* Detatch the disk. */ 1265 disk_detach(&vnd->sc_dkdev); 1266 break; 1267 1268 #ifdef COMPAT_30 1269 case VNDIOOCGET: { 1270 struct vnd_ouser *vnu; 1271 struct vattr va; 1272 vnu = (struct vnd_ouser *)data; 1273 KASSERT(l); 1274 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1275 case 0: 1276 vnu->vnu_dev = va.va_fsid; 1277 vnu->vnu_ino = va.va_fileid; 1278 break; 1279 case -1: 1280 /* unused is not an error */ 1281 vnu->vnu_dev = 0; 1282 vnu->vnu_ino = 0; 1283 break; 1284 default: 1285 return error; 1286 } 1287 break; 1288 } 1289 #endif 1290 case VNDIOCGET: { 1291 struct vnd_user *vnu; 1292 struct vattr va; 1293 vnu = (struct vnd_user *)data; 1294 KASSERT(l); 1295 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1296 case 0: 1297 vnu->vnu_dev = va.va_fsid; 1298 vnu->vnu_ino = va.va_fileid; 1299 break; 1300 case -1: 1301 /* unused is not an error */ 1302 vnu->vnu_dev = 0; 1303 vnu->vnu_ino = 0; 1304 break; 1305 default: 1306 return error; 1307 } 1308 break; 1309 } 1310 1311 case DIOCGDINFO: 1312 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1313 break; 1314 1315 #ifdef __HAVE_OLD_DISKLABEL 1316 case ODIOCGDINFO: 1317 newlabel = *(vnd->sc_dkdev.dk_label); 1318 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1319 return ENOTTY; 1320 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1321 break; 1322 #endif 1323 1324 case DIOCGPART: 1325 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1326 ((struct partinfo *)data)->part = 1327 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1328 break; 1329 1330 case DIOCWDINFO: 1331 case DIOCSDINFO: 1332 #ifdef __HAVE_OLD_DISKLABEL 1333 case ODIOCWDINFO: 1334 case ODIOCSDINFO: 1335 #endif 1336 { 1337 struct disklabel *lp; 1338 1339 if ((error = vndlock(vnd)) != 0) 1340 return (error); 1341 1342 vnd->sc_flags |= VNF_LABELLING; 1343 1344 #ifdef __HAVE_OLD_DISKLABEL 1345 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1346 memset(&newlabel, 0, sizeof newlabel); 1347 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1348 lp = &newlabel; 1349 } else 1350 #endif 1351 lp = (struct disklabel *)data; 1352 1353 error = setdisklabel(vnd->sc_dkdev.dk_label, 1354 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1355 if (error == 0) { 1356 if (cmd == DIOCWDINFO 1357 #ifdef __HAVE_OLD_DISKLABEL 1358 || cmd == ODIOCWDINFO 1359 #endif 1360 ) 1361 error = writedisklabel(VNDLABELDEV(dev), 1362 vndstrategy, vnd->sc_dkdev.dk_label, 1363 vnd->sc_dkdev.dk_cpulabel); 1364 } 1365 1366 vnd->sc_flags &= ~VNF_LABELLING; 1367 1368 vndunlock(vnd); 1369 1370 if (error) 1371 return (error); 1372 break; 1373 } 1374 1375 case DIOCKLABEL: 1376 if (*(int *)data != 0) 1377 vnd->sc_flags |= VNF_KLABEL; 1378 else 1379 vnd->sc_flags &= ~VNF_KLABEL; 1380 break; 1381 1382 case DIOCWLABEL: 1383 if (*(int *)data != 0) 1384 vnd->sc_flags |= VNF_WLABEL; 1385 else 1386 vnd->sc_flags &= ~VNF_WLABEL; 1387 break; 1388 1389 case DIOCGDEFLABEL: 1390 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1391 break; 1392 1393 #ifdef __HAVE_OLD_DISKLABEL 1394 case ODIOCGDEFLABEL: 1395 vndgetdefaultlabel(vnd, &newlabel); 1396 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1397 return ENOTTY; 1398 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1399 break; 1400 #endif 1401 1402 default: 1403 return (ENOTTY); 1404 } 1405 1406 return (0); 1407 } 1408 1409 /* 1410 * Duplicate the current processes' credentials. Since we are called only 1411 * as the result of a SET ioctl and only root can do that, any future access 1412 * to this "disk" is essentially as root. Note that credentials may change 1413 * if some other uid can write directly to the mapped file (NFS). 1414 */ 1415 static int 1416 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1417 { 1418 struct uio auio; 1419 struct iovec aiov; 1420 char *tmpbuf; 1421 int error; 1422 1423 vnd->sc_cred = kauth_cred_dup(cred); 1424 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1425 1426 /* XXX: Horrible kludge to establish credentials for NFS */ 1427 aiov.iov_base = tmpbuf; 1428 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1429 auio.uio_iov = &aiov; 1430 auio.uio_iovcnt = 1; 1431 auio.uio_offset = 0; 1432 auio.uio_rw = UIO_READ; 1433 auio.uio_resid = aiov.iov_len; 1434 UIO_SETUP_SYSSPACE(&auio); 1435 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1436 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1437 if (error == 0) { 1438 /* 1439 * Because vnd does all IO directly through the vnode 1440 * we need to flush (at least) the buffer from the above 1441 * VOP_READ from the buffer cache to prevent cache 1442 * incoherencies. Also, be careful to write dirty 1443 * buffers back to stable storage. 1444 */ 1445 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1446 curlwp, 0, 0); 1447 } 1448 VOP_UNLOCK(vnd->sc_vp, 0); 1449 1450 free(tmpbuf, M_TEMP); 1451 return (error); 1452 } 1453 1454 /* 1455 * Set maxactive based on FS type 1456 */ 1457 static void 1458 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1459 { 1460 #ifdef NFS 1461 extern int (**nfsv2_vnodeop_p)(void *); 1462 1463 if (vp->v_op == nfsv2_vnodeop_p) 1464 vnd->sc_maxactive = 2; 1465 else 1466 #endif 1467 vnd->sc_maxactive = 8; 1468 1469 if (vnd->sc_maxactive < 1) 1470 vnd->sc_maxactive = 1; 1471 } 1472 1473 #if 0 1474 static void 1475 vndshutdown(void) 1476 { 1477 struct vnd_softc *vnd; 1478 1479 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1480 if (vnd->sc_flags & VNF_INITED) 1481 vndclear(vnd); 1482 } 1483 #endif 1484 1485 static void 1486 vndclear(struct vnd_softc *vnd, int myminor) 1487 { 1488 struct vnode *vp = vnd->sc_vp; 1489 int fflags = FREAD; 1490 int bmaj, cmaj, i, mn; 1491 int s; 1492 1493 #ifdef DEBUG 1494 if (vnddebug & VDB_FOLLOW) 1495 printf("vndclear(%p): vp %p\n", vnd, vp); 1496 #endif 1497 /* locate the major number */ 1498 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1499 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1500 1501 /* Nuke the vnodes for any open instances */ 1502 for (i = 0; i < MAXPARTITIONS; i++) { 1503 mn = DISKMINOR(device_unit(vnd->sc_dev), i); 1504 vdevgone(bmaj, mn, mn, VBLK); 1505 if (mn != myminor) /* XXX avoid to kill own vnode */ 1506 vdevgone(cmaj, mn, mn, VCHR); 1507 } 1508 1509 if ((vnd->sc_flags & VNF_READONLY) == 0) 1510 fflags |= FWRITE; 1511 1512 s = splbio(); 1513 bufq_drain(vnd->sc_tab); 1514 splx(s); 1515 1516 vnd->sc_flags |= VNF_VUNCONF; 1517 wakeup(&vnd->sc_tab); 1518 while (vnd->sc_flags & VNF_KTHREAD) 1519 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1520 1521 #ifdef VND_COMPRESSION 1522 /* free the compressed file buffers */ 1523 if(vnd->sc_flags & VNF_COMP) { 1524 if(vnd->sc_comp_offsets) { 1525 free(vnd->sc_comp_offsets, M_DEVBUF); 1526 vnd->sc_comp_offsets = NULL; 1527 } 1528 if(vnd->sc_comp_buff) { 1529 free(vnd->sc_comp_buff, M_DEVBUF); 1530 vnd->sc_comp_buff = NULL; 1531 } 1532 if(vnd->sc_comp_decombuf) { 1533 free(vnd->sc_comp_decombuf, M_DEVBUF); 1534 vnd->sc_comp_decombuf = NULL; 1535 } 1536 } 1537 #endif /* VND_COMPRESSION */ 1538 vnd->sc_flags &= 1539 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1540 | VNF_VUNCONF | VNF_COMP); 1541 if (vp == (struct vnode *)0) 1542 panic("vndclear: null vp"); 1543 (void) vn_close(vp, fflags, vnd->sc_cred); 1544 kauth_cred_free(vnd->sc_cred); 1545 vnd->sc_vp = (struct vnode *)0; 1546 vnd->sc_cred = (kauth_cred_t)0; 1547 vnd->sc_size = 0; 1548 } 1549 1550 static int 1551 vndsize(dev_t dev) 1552 { 1553 struct vnd_softc *sc; 1554 struct disklabel *lp; 1555 int part, unit, omask; 1556 int size; 1557 1558 unit = vndunit(dev); 1559 sc = device_private(device_lookup(&vnd_cd, unit)); 1560 if (sc == NULL) 1561 return -1; 1562 1563 if ((sc->sc_flags & VNF_INITED) == 0) 1564 return (-1); 1565 1566 part = DISKPART(dev); 1567 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1568 lp = sc->sc_dkdev.dk_label; 1569 1570 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1571 return (-1); 1572 1573 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1574 size = -1; 1575 else 1576 size = lp->d_partitions[part].p_size * 1577 (lp->d_secsize / DEV_BSIZE); 1578 1579 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1580 return (-1); 1581 1582 return (size); 1583 } 1584 1585 static int 1586 vnddump(dev_t dev, daddr_t blkno, void *va, 1587 size_t size) 1588 { 1589 1590 /* Not implemented. */ 1591 return ENXIO; 1592 } 1593 1594 static void 1595 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1596 { 1597 struct vndgeom *vng = &sc->sc_geom; 1598 struct partition *pp; 1599 1600 memset(lp, 0, sizeof(*lp)); 1601 1602 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1603 lp->d_secsize = vng->vng_secsize; 1604 lp->d_nsectors = vng->vng_nsectors; 1605 lp->d_ntracks = vng->vng_ntracks; 1606 lp->d_ncylinders = vng->vng_ncylinders; 1607 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1608 1609 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1610 lp->d_type = DTYPE_VND; 1611 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1612 lp->d_rpm = 3600; 1613 lp->d_interleave = 1; 1614 lp->d_flags = 0; 1615 1616 pp = &lp->d_partitions[RAW_PART]; 1617 pp->p_offset = 0; 1618 pp->p_size = lp->d_secperunit; 1619 pp->p_fstype = FS_UNUSED; 1620 lp->d_npartitions = RAW_PART + 1; 1621 1622 lp->d_magic = DISKMAGIC; 1623 lp->d_magic2 = DISKMAGIC; 1624 lp->d_checksum = dkcksum(lp); 1625 } 1626 1627 /* 1628 * Read the disklabel from a vnd. If one is not present, create a fake one. 1629 */ 1630 static void 1631 vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1632 { 1633 const char *errstring; 1634 struct disklabel *lp = sc->sc_dkdev.dk_label; 1635 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1636 int i; 1637 1638 memset(clp, 0, sizeof(*clp)); 1639 1640 vndgetdefaultlabel(sc, lp); 1641 1642 /* 1643 * Call the generic disklabel extraction routine. 1644 */ 1645 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1646 if (errstring) { 1647 /* 1648 * Lack of disklabel is common, but we print the warning 1649 * anyway, since it might contain other useful information. 1650 */ 1651 aprint_normal_dev(sc->sc_dev, "%s\n", errstring); 1652 1653 /* 1654 * For historical reasons, if there's no disklabel 1655 * present, all partitions must be FS_BSDFFS and 1656 * occupy the entire disk. 1657 */ 1658 for (i = 0; i < MAXPARTITIONS; i++) { 1659 /* 1660 * Don't wipe out port specific hack (such as 1661 * dos partition hack of i386 port). 1662 */ 1663 if (lp->d_partitions[i].p_size != 0) 1664 continue; 1665 1666 lp->d_partitions[i].p_size = lp->d_secperunit; 1667 lp->d_partitions[i].p_offset = 0; 1668 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1669 } 1670 1671 strncpy(lp->d_packname, "default label", 1672 sizeof(lp->d_packname)); 1673 1674 lp->d_npartitions = MAXPARTITIONS; 1675 lp->d_checksum = dkcksum(lp); 1676 } 1677 1678 /* In-core label now valid. */ 1679 sc->sc_flags |= VNF_VLABEL; 1680 } 1681 1682 /* 1683 * Wait interruptibly for an exclusive lock. 1684 * 1685 * XXX 1686 * Several drivers do this; it should be abstracted and made MP-safe. 1687 */ 1688 static int 1689 vndlock(struct vnd_softc *sc) 1690 { 1691 int error; 1692 1693 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1694 sc->sc_flags |= VNF_WANTED; 1695 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1696 return (error); 1697 } 1698 sc->sc_flags |= VNF_LOCKED; 1699 return (0); 1700 } 1701 1702 /* 1703 * Unlock and wake up any waiters. 1704 */ 1705 static void 1706 vndunlock(struct vnd_softc *sc) 1707 { 1708 1709 sc->sc_flags &= ~VNF_LOCKED; 1710 if ((sc->sc_flags & VNF_WANTED) != 0) { 1711 sc->sc_flags &= ~VNF_WANTED; 1712 wakeup(sc); 1713 } 1714 } 1715 1716 #ifdef VND_COMPRESSION 1717 /* compressed file read */ 1718 static void 1719 compstrategy(struct buf *bp, off_t bn) 1720 { 1721 int error; 1722 int unit = vndunit(bp->b_dev); 1723 struct vnd_softc *vnd = 1724 device_private(device_lookup(&vnd_cd, unit)); 1725 u_int32_t comp_block; 1726 struct uio auio; 1727 char *addr; 1728 int s; 1729 1730 /* set up constants for data move */ 1731 auio.uio_rw = UIO_READ; 1732 UIO_SETUP_SYSSPACE(&auio); 1733 1734 /* read, and transfer the data */ 1735 addr = bp->b_data; 1736 bp->b_resid = bp->b_bcount; 1737 s = splbio(); 1738 while (bp->b_resid > 0) { 1739 unsigned length; 1740 size_t length_in_buffer; 1741 u_int32_t offset_in_buffer; 1742 struct iovec aiov; 1743 1744 /* calculate the compressed block number */ 1745 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1746 1747 /* check for good block number */ 1748 if (comp_block >= vnd->sc_comp_numoffs) { 1749 bp->b_error = EINVAL; 1750 splx(s); 1751 return; 1752 } 1753 1754 /* read in the compressed block, if not in buffer */ 1755 if (comp_block != vnd->sc_comp_buffblk) { 1756 length = vnd->sc_comp_offsets[comp_block + 1] - 1757 vnd->sc_comp_offsets[comp_block]; 1758 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1759 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1760 length, vnd->sc_comp_offsets[comp_block], 1761 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL); 1762 if (error) { 1763 bp->b_error = error; 1764 VOP_UNLOCK(vnd->sc_vp, 0); 1765 splx(s); 1766 return; 1767 } 1768 /* uncompress the buffer */ 1769 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1770 vnd->sc_comp_stream.avail_in = length; 1771 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1772 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1773 inflateReset(&vnd->sc_comp_stream); 1774 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1775 if (error != Z_STREAM_END) { 1776 if (vnd->sc_comp_stream.msg) 1777 aprint_normal_dev(vnd->sc_dev, 1778 "compressed file, %s\n", 1779 vnd->sc_comp_stream.msg); 1780 bp->b_error = EBADMSG; 1781 VOP_UNLOCK(vnd->sc_vp, 0); 1782 splx(s); 1783 return; 1784 } 1785 vnd->sc_comp_buffblk = comp_block; 1786 VOP_UNLOCK(vnd->sc_vp, 0); 1787 } 1788 1789 /* transfer the usable uncompressed data */ 1790 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1791 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1792 if (length_in_buffer > bp->b_resid) 1793 length_in_buffer = bp->b_resid; 1794 auio.uio_iov = &aiov; 1795 auio.uio_iovcnt = 1; 1796 aiov.iov_base = addr; 1797 aiov.iov_len = length_in_buffer; 1798 auio.uio_resid = aiov.iov_len; 1799 auio.uio_offset = 0; 1800 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1801 length_in_buffer, &auio); 1802 if (error) { 1803 bp->b_error = error; 1804 splx(s); 1805 return; 1806 } 1807 1808 bn += length_in_buffer; 1809 addr += length_in_buffer; 1810 bp->b_resid -= length_in_buffer; 1811 } 1812 splx(s); 1813 } 1814 1815 /* compression memory allocation routines */ 1816 static void * 1817 vnd_alloc(void *aux, u_int items, u_int siz) 1818 { 1819 return malloc(items * siz, M_TEMP, M_NOWAIT); 1820 } 1821 1822 static void 1823 vnd_free(void *aux, void *ptr) 1824 { 1825 free(ptr, M_TEMP); 1826 } 1827 #endif /* VND_COMPRESSION */ 1828 1829 static void 1830 vnd_set_properties(struct vnd_softc *vnd) 1831 { 1832 prop_dictionary_t disk_info, odisk_info, geom; 1833 1834 disk_info = prop_dictionary_create(); 1835 1836 geom = prop_dictionary_create(); 1837 1838 prop_dictionary_set_uint64(geom, "sectors-per-unit", 1839 vnd->sc_geom.vng_nsectors * vnd->sc_geom.vng_ntracks * 1840 vnd->sc_geom.vng_ncylinders); 1841 1842 prop_dictionary_set_uint32(geom, "sector-size", 1843 vnd->sc_geom.vng_secsize); 1844 1845 prop_dictionary_set_uint16(geom, "sectors-per-track", 1846 vnd->sc_geom.vng_nsectors); 1847 1848 prop_dictionary_set_uint16(geom, "tracks-per-cylinder", 1849 vnd->sc_geom.vng_ntracks); 1850 1851 prop_dictionary_set_uint64(geom, "cylinders-per-unit", 1852 vnd->sc_geom.vng_ncylinders); 1853 1854 prop_dictionary_set(disk_info, "geometry", geom); 1855 prop_object_release(geom); 1856 1857 prop_dictionary_set(device_properties(vnd->sc_dev), 1858 "disk-info", disk_info); 1859 1860 /* 1861 * Don't release disk_info here; we keep a reference to it. 1862 * disk_detach() will release it when we go away. 1863 */ 1864 1865 odisk_info = vnd->sc_dkdev.dk_info; 1866 vnd->sc_dkdev.dk_info = disk_info; 1867 if (odisk_info) 1868 prop_object_release(odisk_info); 1869 } 1870