1 /* $NetBSD: vnd.c,v 1.190 2009/01/11 02:45:50 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1990, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * the Systems Programming Group of the University of Utah Computer 38 * Science Department. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 65 * 66 * @(#)vn.c 8.9 (Berkeley) 5/14/95 67 */ 68 69 /* 70 * Copyright (c) 1988 University of Utah. 71 * 72 * This code is derived from software contributed to Berkeley by 73 * the Systems Programming Group of the University of Utah Computer 74 * Science Department. 75 * 76 * Redistribution and use in source and binary forms, with or without 77 * modification, are permitted provided that the following conditions 78 * are met: 79 * 1. Redistributions of source code must retain the above copyright 80 * notice, this list of conditions and the following disclaimer. 81 * 2. Redistributions in binary form must reproduce the above copyright 82 * notice, this list of conditions and the following disclaimer in the 83 * documentation and/or other materials provided with the distribution. 84 * 3. All advertising materials mentioning features or use of this software 85 * must display the following acknowledgement: 86 * This product includes software developed by the University of 87 * California, Berkeley and its contributors. 88 * 4. Neither the name of the University nor the names of its contributors 89 * may be used to endorse or promote products derived from this software 90 * without specific prior written permission. 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 102 * SUCH DAMAGE. 103 * 104 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 105 * 106 * @(#)vn.c 8.9 (Berkeley) 5/14/95 107 */ 108 109 /* 110 * Vnode disk driver. 111 * 112 * Block/character interface to a vnode. Allows one to treat a file 113 * as a disk (e.g. build a filesystem in it, mount it, etc.). 114 * 115 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 116 * this uses them to avoid distorting the local buffer cache. If those 117 * block-level operations are not available, this falls back to the regular 118 * read and write calls. Using these may distort the cache in some cases 119 * but better have the driver working than preventing it to work on file 120 * systems where the block-level operations are not implemented for 121 * whatever reason. 122 * 123 * NOTE 2: There is a security issue involved with this driver. 124 * Once mounted all access to the contents of the "mapped" file via 125 * the special file is controlled by the permissions on the special 126 * file, the protection of the mapped file is ignored (effectively, 127 * by using root credentials in all transactions). 128 * 129 * NOTE 3: Doesn't interact with leases, should it? 130 */ 131 132 #include <sys/cdefs.h> 133 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.190 2009/01/11 02:45:50 christos Exp $"); 134 135 #if defined(_KERNEL_OPT) 136 #include "fs_nfs.h" 137 #include "opt_vnd.h" 138 #endif 139 140 #include <sys/param.h> 141 #include <sys/systm.h> 142 #include <sys/namei.h> 143 #include <sys/proc.h> 144 #include <sys/kthread.h> 145 #include <sys/errno.h> 146 #include <sys/buf.h> 147 #include <sys/bufq.h> 148 #include <sys/malloc.h> 149 #include <sys/ioctl.h> 150 #include <sys/disklabel.h> 151 #include <sys/device.h> 152 #include <sys/disk.h> 153 #include <sys/stat.h> 154 #include <sys/mount.h> 155 #include <sys/vnode.h> 156 #include <sys/file.h> 157 #include <sys/uio.h> 158 #include <sys/conf.h> 159 #include <sys/kauth.h> 160 161 #include <net/zlib.h> 162 163 #include <miscfs/genfs/genfs.h> 164 #include <miscfs/specfs/specdev.h> 165 166 #include <dev/vndvar.h> 167 168 #include <prop/proplib.h> 169 170 #if defined(VNDDEBUG) && !defined(DEBUG) 171 #define DEBUG 172 #endif 173 174 #ifdef DEBUG 175 int dovndcluster = 1; 176 #define VDB_FOLLOW 0x01 177 #define VDB_INIT 0x02 178 #define VDB_IO 0x04 179 #define VDB_LABEL 0x08 180 int vnddebug = 0x00; 181 #endif 182 183 #define vndunit(x) DISKUNIT(x) 184 185 struct vndxfer { 186 struct buf vx_buf; 187 struct vnd_softc *vx_vnd; 188 }; 189 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 190 191 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 192 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 193 194 #define VNDLABELDEV(dev) \ 195 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 196 197 /* called by main() at boot time */ 198 void vndattach(int); 199 200 static void vndclear(struct vnd_softc *, int); 201 static int vndsetcred(struct vnd_softc *, kauth_cred_t); 202 static void vndthrottle(struct vnd_softc *, struct vnode *); 203 static void vndiodone(struct buf *); 204 #if 0 205 static void vndshutdown(void); 206 #endif 207 208 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 209 static void vndgetdisklabel(dev_t, struct vnd_softc *); 210 211 static int vndlock(struct vnd_softc *); 212 static void vndunlock(struct vnd_softc *); 213 #ifdef VND_COMPRESSION 214 static void compstrategy(struct buf *, off_t); 215 static void *vnd_alloc(void *, u_int, u_int); 216 static void vnd_free(void *, void *); 217 #endif /* VND_COMPRESSION */ 218 219 static void vndthread(void *); 220 static bool vnode_has_op(const struct vnode *, int); 221 static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 222 struct buf *); 223 static void handle_with_strategy(struct vnd_softc *, const struct buf *, 224 struct buf *); 225 static void vnd_set_properties(struct vnd_softc *); 226 227 static dev_type_open(vndopen); 228 static dev_type_close(vndclose); 229 static dev_type_read(vndread); 230 static dev_type_write(vndwrite); 231 static dev_type_ioctl(vndioctl); 232 static dev_type_strategy(vndstrategy); 233 static dev_type_dump(vnddump); 234 static dev_type_size(vndsize); 235 236 const struct bdevsw vnd_bdevsw = { 237 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 238 }; 239 240 const struct cdevsw vnd_cdevsw = { 241 vndopen, vndclose, vndread, vndwrite, vndioctl, 242 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 243 }; 244 245 static int vnd_match(device_t, cfdata_t, void *); 246 static void vnd_attach(device_t, device_t, void *); 247 static int vnd_detach(device_t, int); 248 249 CFATTACH_DECL_NEW(vnd, sizeof(struct vnd_softc), 250 vnd_match, vnd_attach, vnd_detach, NULL); 251 extern struct cfdriver vnd_cd; 252 253 static struct vnd_softc *vnd_spawn(int); 254 int vnd_destroy(device_t); 255 256 void 257 vndattach(int num) 258 { 259 int error; 260 261 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 262 if (error) 263 aprint_error("%s: unable to register cfattach\n", 264 vnd_cd.cd_name); 265 } 266 267 static int 268 vnd_match(device_t self, cfdata_t cfdata, void *aux) 269 { 270 271 return 1; 272 } 273 274 static void 275 vnd_attach(device_t parent, device_t self, void *aux) 276 { 277 struct vnd_softc *sc = device_private(self); 278 279 sc->sc_dev = self; 280 sc->sc_comp_offsets = NULL; 281 sc->sc_comp_buff = NULL; 282 sc->sc_comp_decombuf = NULL; 283 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 284 disk_init(&sc->sc_dkdev, device_xname(self), NULL); 285 if (!pmf_device_register(self, NULL, NULL)) 286 aprint_error_dev(self, "couldn't establish power handler\n"); 287 } 288 289 static int 290 vnd_detach(device_t self, int flags) 291 { 292 struct vnd_softc *sc = device_private(self); 293 if (sc->sc_flags & VNF_INITED) 294 return EBUSY; 295 296 pmf_device_deregister(self); 297 bufq_free(sc->sc_tab); 298 disk_destroy(&sc->sc_dkdev); 299 300 return 0; 301 } 302 303 static struct vnd_softc * 304 vnd_spawn(int unit) 305 { 306 struct cfdata *cf; 307 308 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 309 cf->cf_name = vnd_cd.cd_name; 310 cf->cf_atname = vnd_cd.cd_name; 311 cf->cf_unit = unit; 312 cf->cf_fstate = FSTATE_STAR; 313 314 return device_private(config_attach_pseudo(cf)); 315 } 316 317 int 318 vnd_destroy(device_t dev) 319 { 320 int error; 321 cfdata_t cf; 322 323 cf = device_cfdata(dev); 324 error = config_detach(dev, DETACH_QUIET); 325 if (error) 326 return error; 327 free(cf, M_DEVBUF); 328 return 0; 329 } 330 331 static int 332 vndopen(dev_t dev, int flags, int mode, struct lwp *l) 333 { 334 int unit = vndunit(dev); 335 struct vnd_softc *sc; 336 int error = 0, part, pmask; 337 struct disklabel *lp; 338 339 #ifdef DEBUG 340 if (vnddebug & VDB_FOLLOW) 341 printf("vndopen(0x%llx, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 342 #endif 343 sc = device_lookup_private(&vnd_cd, unit); 344 if (sc == NULL) { 345 sc = vnd_spawn(unit); 346 if (sc == NULL) 347 return ENOMEM; 348 } 349 350 if ((error = vndlock(sc)) != 0) 351 return (error); 352 353 lp = sc->sc_dkdev.dk_label; 354 355 part = DISKPART(dev); 356 pmask = (1 << part); 357 358 /* 359 * If we're initialized, check to see if there are any other 360 * open partitions. If not, then it's safe to update the 361 * in-core disklabel. Only read the disklabel if it is 362 * not already valid. 363 */ 364 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 365 sc->sc_dkdev.dk_openmask == 0) 366 vndgetdisklabel(dev, sc); 367 368 /* Check that the partitions exists. */ 369 if (part != RAW_PART) { 370 if (((sc->sc_flags & VNF_INITED) == 0) || 371 ((part >= lp->d_npartitions) || 372 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 373 error = ENXIO; 374 goto done; 375 } 376 } 377 378 /* Prevent our unit from being unconfigured while open. */ 379 switch (mode) { 380 case S_IFCHR: 381 sc->sc_dkdev.dk_copenmask |= pmask; 382 break; 383 384 case S_IFBLK: 385 sc->sc_dkdev.dk_bopenmask |= pmask; 386 break; 387 } 388 sc->sc_dkdev.dk_openmask = 389 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 390 391 done: 392 vndunlock(sc); 393 return (error); 394 } 395 396 static int 397 vndclose(dev_t dev, int flags, int mode, struct lwp *l) 398 { 399 int unit = vndunit(dev); 400 struct vnd_softc *sc; 401 int error = 0, part; 402 403 #ifdef DEBUG 404 if (vnddebug & VDB_FOLLOW) 405 printf("vndclose(0x%llx, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 406 #endif 407 sc = device_lookup_private(&vnd_cd, unit); 408 if (sc == NULL) 409 return ENXIO; 410 411 if ((error = vndlock(sc)) != 0) 412 return (error); 413 414 part = DISKPART(dev); 415 416 /* ...that much closer to allowing unconfiguration... */ 417 switch (mode) { 418 case S_IFCHR: 419 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 420 break; 421 422 case S_IFBLK: 423 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 424 break; 425 } 426 sc->sc_dkdev.dk_openmask = 427 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 428 429 vndunlock(sc); 430 431 if ((sc->sc_flags & VNF_INITED) == 0) { 432 if ((error = vnd_destroy(sc->sc_dev)) != 0) { 433 aprint_error_dev(sc->sc_dev, 434 "unable to detach instance\n"); 435 return error; 436 } 437 } 438 439 return (0); 440 } 441 442 /* 443 * Queue the request, and wakeup the kernel thread to handle it. 444 */ 445 static void 446 vndstrategy(struct buf *bp) 447 { 448 int unit = vndunit(bp->b_dev); 449 struct vnd_softc *vnd = 450 device_lookup_private(&vnd_cd, unit); 451 struct disklabel *lp; 452 daddr_t blkno; 453 int s = splbio(); 454 455 if (vnd == NULL) { 456 bp->b_error = ENXIO; 457 goto done; 458 } 459 lp = vnd->sc_dkdev.dk_label; 460 461 if ((vnd->sc_flags & VNF_INITED) == 0) { 462 bp->b_error = ENXIO; 463 goto done; 464 } 465 466 /* 467 * The transfer must be a whole number of blocks. 468 */ 469 if ((bp->b_bcount % lp->d_secsize) != 0) { 470 bp->b_error = EINVAL; 471 goto done; 472 } 473 474 /* 475 * check if we're read-only. 476 */ 477 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 478 bp->b_error = EACCES; 479 goto done; 480 } 481 482 /* If it's a nil transfer, wake up the top half now. */ 483 if (bp->b_bcount == 0) { 484 goto done; 485 } 486 487 /* 488 * Do bounds checking and adjust transfer. If there's an error, 489 * the bounds check will flag that for us. 490 */ 491 if (DISKPART(bp->b_dev) == RAW_PART) { 492 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 493 vnd->sc_size) <= 0) 494 goto done; 495 } else { 496 if (bounds_check_with_label(&vnd->sc_dkdev, 497 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 498 goto done; 499 } 500 501 /* 502 * Put the block number in terms of the logical blocksize 503 * of the "device". 504 */ 505 506 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 507 508 /* 509 * Translate the partition-relative block number to an absolute. 510 */ 511 if (DISKPART(bp->b_dev) != RAW_PART) { 512 struct partition *pp; 513 514 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 515 DISKPART(bp->b_dev)]; 516 blkno += pp->p_offset; 517 } 518 bp->b_rawblkno = blkno; 519 520 #ifdef DEBUG 521 if (vnddebug & VDB_FOLLOW) 522 printf("vndstrategy(%p): unit %d\n", bp, unit); 523 #endif 524 BUFQ_PUT(vnd->sc_tab, bp); 525 wakeup(&vnd->sc_tab); 526 splx(s); 527 return; 528 529 done: 530 bp->b_resid = bp->b_bcount; 531 biodone(bp); 532 splx(s); 533 } 534 535 static bool 536 vnode_has_strategy(struct vnd_softc *vnd) 537 { 538 return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 539 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 540 } 541 542 /* XXX this function needs a reliable check to detect 543 * sparse files. Otherwise, bmap/strategy may be used 544 * and fail on non-allocated blocks. VOP_READ/VOP_WRITE 545 * works on sparse files. 546 */ 547 #if notyet 548 static bool 549 vnode_strategy_probe(struct vnd_softc *vnd) 550 { 551 int error; 552 daddr_t nbn; 553 554 if (!vnode_has_strategy(vnd)) 555 return false; 556 557 /* Convert the first logical block number to its 558 * physical block number. 559 */ 560 error = 0; 561 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 562 error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL); 563 VOP_UNLOCK(vnd->sc_vp, 0); 564 565 /* Test if that worked. */ 566 if (error == 0 && (long)nbn == -1) 567 return false; 568 569 return true; 570 } 571 #endif 572 573 static void 574 vndthread(void *arg) 575 { 576 struct vnd_softc *vnd = arg; 577 bool usestrategy; 578 int s; 579 580 /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to 581 * directly access the backing vnode. If we can, use these two 582 * operations to avoid messing with the local buffer cache. 583 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 584 * which are guaranteed to work with any file system. */ 585 usestrategy = vnode_has_strategy(vnd); 586 587 #ifdef DEBUG 588 if (vnddebug & VDB_INIT) 589 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 590 usestrategy ? 591 "using bmap/strategy operations" : 592 "using read/write operations"); 593 #endif 594 595 s = splbio(); 596 vnd->sc_flags |= VNF_KTHREAD; 597 wakeup(&vnd->sc_kthread); 598 599 /* 600 * Dequeue requests and serve them depending on the available 601 * vnode operations. 602 */ 603 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 604 struct vndxfer *vnx; 605 int flags; 606 struct buf *obp; 607 struct buf *bp; 608 609 obp = BUFQ_GET(vnd->sc_tab); 610 if (obp == NULL) { 611 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 612 continue; 613 }; 614 splx(s); 615 flags = obp->b_flags; 616 #ifdef DEBUG 617 if (vnddebug & VDB_FOLLOW) 618 printf("vndthread(%p)\n", obp); 619 #endif 620 621 if (vnd->sc_vp->v_mount == NULL) { 622 obp->b_error = ENXIO; 623 goto done; 624 } 625 #ifdef VND_COMPRESSION 626 /* handle a compressed read */ 627 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 628 off_t bn; 629 630 /* Convert to a byte offset within the file. */ 631 bn = obp->b_rawblkno * 632 vnd->sc_dkdev.dk_label->d_secsize; 633 634 compstrategy(obp, bn); 635 goto done; 636 } 637 #endif /* VND_COMPRESSION */ 638 639 /* 640 * Allocate a header for this transfer and link it to the 641 * buffer 642 */ 643 s = splbio(); 644 vnx = VND_GETXFER(vnd); 645 splx(s); 646 vnx->vx_vnd = vnd; 647 648 s = splbio(); 649 while (vnd->sc_active >= vnd->sc_maxactive) { 650 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 651 } 652 vnd->sc_active++; 653 splx(s); 654 655 /* Instrumentation. */ 656 disk_busy(&vnd->sc_dkdev); 657 658 bp = &vnx->vx_buf; 659 buf_init(bp); 660 bp->b_flags = (obp->b_flags & B_READ); 661 bp->b_oflags = obp->b_oflags; 662 bp->b_cflags = obp->b_cflags; 663 bp->b_iodone = vndiodone; 664 bp->b_private = obp; 665 bp->b_vp = vnd->sc_vp; 666 bp->b_objlock = &bp->b_vp->v_interlock; 667 bp->b_data = obp->b_data; 668 bp->b_bcount = obp->b_bcount; 669 BIO_COPYPRIO(bp, obp); 670 671 /* Handle the request using the appropriate operations. */ 672 if (usestrategy) 673 handle_with_strategy(vnd, obp, bp); 674 else 675 handle_with_rdwr(vnd, obp, bp); 676 677 s = splbio(); 678 continue; 679 680 done: 681 biodone(obp); 682 s = splbio(); 683 } 684 685 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 686 wakeup(&vnd->sc_kthread); 687 splx(s); 688 kthread_exit(0); 689 } 690 691 /* 692 * Checks if the given vnode supports the requested operation. 693 * The operation is specified the offset returned by VOFFSET. 694 * 695 * XXX The test below used to determine this is quite fragile 696 * because it relies on the file system to use genfs to specify 697 * unimplemented operations. There might be another way to do 698 * it more cleanly. 699 */ 700 static bool 701 vnode_has_op(const struct vnode *vp, int opoffset) 702 { 703 int (*defaultp)(void *); 704 int (*opp)(void *); 705 706 defaultp = vp->v_op[VOFFSET(vop_default)]; 707 opp = vp->v_op[opoffset]; 708 709 return opp != defaultp && opp != genfs_eopnotsupp && 710 opp != genfs_badop && opp != genfs_nullop; 711 } 712 713 /* 714 * Handes the read/write request given in 'bp' using the vnode's VOP_READ 715 * and VOP_WRITE operations. 716 * 717 * 'obp' is a pointer to the original request fed to the vnd device. 718 */ 719 static void 720 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 721 { 722 bool doread; 723 off_t offset; 724 size_t resid; 725 struct vnode *vp; 726 727 doread = bp->b_flags & B_READ; 728 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 729 vp = vnd->sc_vp; 730 731 #if defined(DEBUG) 732 if (vnddebug & VDB_IO) 733 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 734 ", secsize %d, offset %" PRIu64 735 ", bcount %d\n", 736 vp, doread ? "read" : "write", obp->b_rawblkno, 737 vnd->sc_dkdev.dk_label->d_secsize, offset, 738 bp->b_bcount); 739 #endif 740 741 /* Issue the read or write operation. */ 742 bp->b_error = 743 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 744 vp, bp->b_data, bp->b_bcount, offset, 745 UIO_SYSSPACE, 0, vnd->sc_cred, &resid, NULL); 746 bp->b_resid = resid; 747 748 /* We need to increase the number of outputs on the vnode if 749 * there was any write to it. */ 750 if (!doread) { 751 mutex_enter(&vp->v_interlock); 752 vp->v_numoutput++; 753 mutex_exit(&vp->v_interlock); 754 } 755 756 biodone(bp); 757 } 758 759 /* 760 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 761 * and VOP_STRATEGY operations. 762 * 763 * 'obp' is a pointer to the original request fed to the vnd device. 764 */ 765 static void 766 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 767 struct buf *bp) 768 { 769 int bsize, error, flags, skipped; 770 size_t resid, sz; 771 off_t bn, offset; 772 struct vnode *vp; 773 774 flags = obp->b_flags; 775 776 if (!(flags & B_READ)) { 777 vp = bp->b_vp; 778 mutex_enter(&vp->v_interlock); 779 vp->v_numoutput++; 780 mutex_exit(&vp->v_interlock); 781 } 782 783 /* convert to a byte offset within the file. */ 784 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 785 786 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 787 skipped = 0; 788 789 /* 790 * Break the request into bsize pieces and feed them 791 * sequentially using VOP_BMAP/VOP_STRATEGY. 792 * We do it this way to keep from flooding NFS servers if we 793 * are connected to an NFS file. This places the burden on 794 * the client rather than the server. 795 */ 796 error = 0; 797 bp->b_resid = bp->b_bcount; 798 for (offset = 0, resid = bp->b_resid; resid; 799 resid -= sz, offset += sz) { 800 struct buf *nbp; 801 daddr_t nbn; 802 int off, nra; 803 804 nra = 0; 805 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 806 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 807 VOP_UNLOCK(vnd->sc_vp, 0); 808 809 if (error == 0 && (long)nbn == -1) 810 error = EIO; 811 812 /* 813 * If there was an error or a hole in the file...punt. 814 * Note that we may have to wait for any operations 815 * that we have already fired off before releasing 816 * the buffer. 817 * 818 * XXX we could deal with holes here but it would be 819 * a hassle (in the write case). 820 */ 821 if (error) { 822 skipped += resid; 823 break; 824 } 825 826 #ifdef DEBUG 827 if (!dovndcluster) 828 nra = 0; 829 #endif 830 831 off = bn % bsize; 832 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 833 #ifdef DEBUG 834 if (vnddebug & VDB_IO) 835 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 836 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, 837 nbn, sz); 838 #endif 839 840 nbp = getiobuf(vp, true); 841 nestiobuf_setup(bp, nbp, offset, sz); 842 nbp->b_blkno = nbn + btodb(off); 843 844 #if 0 /* XXX #ifdef DEBUG */ 845 if (vnddebug & VDB_IO) 846 printf("vndstart(%ld): bp %p vp %p blkno " 847 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 848 (long) (vnd-vnd_softc), &nbp->vb_buf, 849 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 850 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 851 nbp->vb_buf.b_bcount); 852 #endif 853 VOP_STRATEGY(vp, nbp); 854 bn += sz; 855 } 856 nestiobuf_done(bp, skipped, error); 857 } 858 859 static void 860 vndiodone(struct buf *bp) 861 { 862 struct vndxfer *vnx = VND_BUFTOXFER(bp); 863 struct vnd_softc *vnd = vnx->vx_vnd; 864 struct buf *obp = bp->b_private; 865 866 KASSERT(&vnx->vx_buf == bp); 867 KASSERT(vnd->sc_active > 0); 868 #ifdef DEBUG 869 if (vnddebug & VDB_IO) { 870 printf("vndiodone1: bp %p iodone: error %d\n", 871 bp, bp->b_error); 872 } 873 #endif 874 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 875 (bp->b_flags & B_READ)); 876 vnd->sc_active--; 877 if (vnd->sc_active == 0) { 878 wakeup(&vnd->sc_tab); 879 } 880 obp->b_error = bp->b_error; 881 obp->b_resid = bp->b_resid; 882 buf_destroy(bp); 883 VND_PUTXFER(vnd, vnx); 884 biodone(obp); 885 } 886 887 /* ARGSUSED */ 888 static int 889 vndread(dev_t dev, struct uio *uio, int flags) 890 { 891 int unit = vndunit(dev); 892 struct vnd_softc *sc; 893 894 #ifdef DEBUG 895 if (vnddebug & VDB_FOLLOW) 896 printf("vndread(0x%llx, %p)\n", dev, uio); 897 #endif 898 899 sc = device_lookup_private(&vnd_cd, unit); 900 if (sc == NULL) 901 return ENXIO; 902 903 if ((sc->sc_flags & VNF_INITED) == 0) 904 return (ENXIO); 905 906 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 907 } 908 909 /* ARGSUSED */ 910 static int 911 vndwrite(dev_t dev, struct uio *uio, int flags) 912 { 913 int unit = vndunit(dev); 914 struct vnd_softc *sc; 915 916 #ifdef DEBUG 917 if (vnddebug & VDB_FOLLOW) 918 printf("vndwrite(0x%llx, %p)\n", dev, uio); 919 #endif 920 921 sc = device_lookup_private(&vnd_cd, unit); 922 if (sc == NULL) 923 return ENXIO; 924 925 if ((sc->sc_flags & VNF_INITED) == 0) 926 return (ENXIO); 927 928 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 929 } 930 931 static int 932 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 933 { 934 struct vnd_softc *vnd; 935 936 if (*un == -1) 937 *un = unit; 938 if (*un < 0) 939 return EINVAL; 940 941 vnd = device_lookup_private(&vnd_cd, *un); 942 if (vnd == NULL) 943 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1; 944 945 if ((vnd->sc_flags & VNF_INITED) == 0) 946 return -1; 947 948 return VOP_GETATTR(vnd->sc_vp, va, l->l_cred); 949 } 950 951 /* ARGSUSED */ 952 static int 953 vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 954 { 955 int unit = vndunit(dev); 956 struct vnd_softc *vnd; 957 struct vnd_ioctl *vio; 958 struct vattr vattr; 959 struct nameidata nd; 960 int error, part, pmask; 961 size_t geomsize; 962 int fflags; 963 #ifdef __HAVE_OLD_DISKLABEL 964 struct disklabel newlabel; 965 #endif 966 967 #ifdef DEBUG 968 if (vnddebug & VDB_FOLLOW) 969 printf("vndioctl(0x%llx, 0x%lx, %p, 0x%x, %p): unit %d\n", 970 dev, cmd, data, flag, l->l_proc, unit); 971 #endif 972 vnd = device_lookup_private(&vnd_cd, unit); 973 if (vnd == NULL && 974 #ifdef COMPAT_30 975 cmd != VNDIOOCGET && 976 #endif 977 cmd != VNDIOCGET) 978 return ENXIO; 979 vio = (struct vnd_ioctl *)data; 980 981 /* Must be open for writes for these commands... */ 982 switch (cmd) { 983 case VNDIOCSET: 984 case VNDIOCCLR: 985 case DIOCSDINFO: 986 case DIOCWDINFO: 987 #ifdef __HAVE_OLD_DISKLABEL 988 case ODIOCSDINFO: 989 case ODIOCWDINFO: 990 #endif 991 case DIOCKLABEL: 992 case DIOCWLABEL: 993 if ((flag & FWRITE) == 0) 994 return (EBADF); 995 } 996 997 /* Must be initialized for these... */ 998 switch (cmd) { 999 case VNDIOCCLR: 1000 case DIOCGDINFO: 1001 case DIOCSDINFO: 1002 case DIOCWDINFO: 1003 case DIOCGPART: 1004 case DIOCKLABEL: 1005 case DIOCWLABEL: 1006 case DIOCGDEFLABEL: 1007 #ifdef __HAVE_OLD_DISKLABEL 1008 case ODIOCGDINFO: 1009 case ODIOCSDINFO: 1010 case ODIOCWDINFO: 1011 case ODIOCGDEFLABEL: 1012 #endif 1013 if ((vnd->sc_flags & VNF_INITED) == 0) 1014 return (ENXIO); 1015 } 1016 1017 switch (cmd) { 1018 case VNDIOCSET: 1019 if (vnd->sc_flags & VNF_INITED) 1020 return (EBUSY); 1021 1022 if ((error = vndlock(vnd)) != 0) 1023 return (error); 1024 1025 fflags = FREAD; 1026 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 1027 fflags |= FWRITE; 1028 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file); 1029 if ((error = vn_open(&nd, fflags, 0)) != 0) 1030 goto unlock_and_exit; 1031 KASSERT(l); 1032 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred); 1033 if (!error && nd.ni_vp->v_type != VREG) 1034 error = EOPNOTSUPP; 1035 if (error) { 1036 VOP_UNLOCK(nd.ni_vp, 0); 1037 goto close_and_exit; 1038 } 1039 1040 /* If using a compressed file, initialize its info */ 1041 /* (or abort with an error if kernel has no compression) */ 1042 if (vio->vnd_flags & VNF_COMP) { 1043 #ifdef VND_COMPRESSION 1044 struct vnd_comp_header *ch; 1045 int i; 1046 u_int32_t comp_size; 1047 u_int32_t comp_maxsize; 1048 1049 /* allocate space for compresed file header */ 1050 ch = malloc(sizeof(struct vnd_comp_header), 1051 M_TEMP, M_WAITOK); 1052 1053 /* read compressed file header */ 1054 error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch, 1055 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1056 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1057 if(error) { 1058 free(ch, M_TEMP); 1059 VOP_UNLOCK(nd.ni_vp, 0); 1060 goto close_and_exit; 1061 } 1062 1063 /* save some header info */ 1064 vnd->sc_comp_blksz = ntohl(ch->block_size); 1065 /* note last offset is the file byte size */ 1066 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 1067 free(ch, M_TEMP); 1068 if (vnd->sc_comp_blksz == 0 || 1069 vnd->sc_comp_blksz % DEV_BSIZE !=0) { 1070 VOP_UNLOCK(nd.ni_vp, 0); 1071 error = EINVAL; 1072 goto close_and_exit; 1073 } 1074 if(sizeof(struct vnd_comp_header) + 1075 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 1076 vattr.va_size) { 1077 VOP_UNLOCK(nd.ni_vp, 0); 1078 error = EINVAL; 1079 goto close_and_exit; 1080 } 1081 1082 /* set decompressed file size */ 1083 vattr.va_size = 1084 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1085 (u_quad_t)vnd->sc_comp_blksz; 1086 1087 /* allocate space for all the compressed offsets */ 1088 vnd->sc_comp_offsets = 1089 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 1090 M_DEVBUF, M_WAITOK); 1091 1092 /* read in the offsets */ 1093 error = vn_rdwr(UIO_READ, nd.ni_vp, 1094 (void *)vnd->sc_comp_offsets, 1095 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 1096 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1097 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1098 if(error) { 1099 VOP_UNLOCK(nd.ni_vp, 0); 1100 goto close_and_exit; 1101 } 1102 /* 1103 * find largest block size (used for allocation limit). 1104 * Also convert offset to native byte order. 1105 */ 1106 comp_maxsize = 0; 1107 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1108 vnd->sc_comp_offsets[i] = 1109 be64toh(vnd->sc_comp_offsets[i]); 1110 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 1111 - vnd->sc_comp_offsets[i]; 1112 if (comp_size > comp_maxsize) 1113 comp_maxsize = comp_size; 1114 } 1115 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1116 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 1117 1118 /* create compressed data buffer */ 1119 vnd->sc_comp_buff = malloc(comp_maxsize, 1120 M_DEVBUF, M_WAITOK); 1121 1122 /* create decompressed buffer */ 1123 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1124 M_DEVBUF, M_WAITOK); 1125 vnd->sc_comp_buffblk = -1; 1126 1127 /* Initialize decompress stream */ 1128 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 1129 vnd->sc_comp_stream.zalloc = vnd_alloc; 1130 vnd->sc_comp_stream.zfree = vnd_free; 1131 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1132 if(error) { 1133 if(vnd->sc_comp_stream.msg) 1134 printf("vnd%d: compressed file, %s\n", 1135 unit, vnd->sc_comp_stream.msg); 1136 VOP_UNLOCK(nd.ni_vp, 0); 1137 error = EINVAL; 1138 goto close_and_exit; 1139 } 1140 1141 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1142 #else /* !VND_COMPRESSION */ 1143 VOP_UNLOCK(nd.ni_vp, 0); 1144 error = EOPNOTSUPP; 1145 goto close_and_exit; 1146 #endif /* VND_COMPRESSION */ 1147 } 1148 1149 VOP_UNLOCK(nd.ni_vp, 0); 1150 vnd->sc_vp = nd.ni_vp; 1151 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1152 1153 /* 1154 * Use pseudo-geometry specified. If none was provided, 1155 * use "standard" Adaptec fictitious geometry. 1156 */ 1157 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1158 1159 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1160 sizeof(vio->vnd_geom)); 1161 1162 /* 1163 * Sanity-check the sector size. 1164 * XXX Don't allow secsize < DEV_BSIZE. Should 1165 * XXX we? 1166 */ 1167 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 1168 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 1169 vnd->sc_geom.vng_ncylinders == 0 || 1170 (vnd->sc_geom.vng_ntracks * 1171 vnd->sc_geom.vng_nsectors) == 0) { 1172 error = EINVAL; 1173 goto close_and_exit; 1174 } 1175 1176 /* 1177 * Compute the size (in DEV_BSIZE blocks) specified 1178 * by the geometry. 1179 */ 1180 geomsize = (vnd->sc_geom.vng_nsectors * 1181 vnd->sc_geom.vng_ntracks * 1182 vnd->sc_geom.vng_ncylinders) * 1183 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1184 1185 /* 1186 * Sanity-check the size against the specified 1187 * geometry. 1188 */ 1189 if (vnd->sc_size < geomsize) { 1190 error = EINVAL; 1191 goto close_and_exit; 1192 } 1193 } else if (vnd->sc_size >= (32 * 64)) { 1194 /* 1195 * Size must be at least 2048 DEV_BSIZE blocks 1196 * (1M) in order to use this geometry. 1197 */ 1198 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1199 vnd->sc_geom.vng_nsectors = 32; 1200 vnd->sc_geom.vng_ntracks = 64; 1201 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1202 } else { 1203 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1204 vnd->sc_geom.vng_nsectors = 1; 1205 vnd->sc_geom.vng_ntracks = 1; 1206 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1207 } 1208 1209 vnd_set_properties(vnd); 1210 1211 if (vio->vnd_flags & VNDIOF_READONLY) { 1212 vnd->sc_flags |= VNF_READONLY; 1213 } 1214 1215 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1216 goto close_and_exit; 1217 1218 vndthrottle(vnd, vnd->sc_vp); 1219 vio->vnd_size = dbtob(vnd->sc_size); 1220 vnd->sc_flags |= VNF_INITED; 1221 1222 /* create the kernel thread, wait for it to be up */ 1223 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, 1224 &vnd->sc_kthread, device_xname(vnd->sc_dev)); 1225 if (error) 1226 goto close_and_exit; 1227 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1228 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1229 } 1230 #ifdef DEBUG 1231 if (vnddebug & VDB_INIT) 1232 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1233 vnd->sc_vp, (unsigned long) vnd->sc_size, 1234 vnd->sc_geom.vng_secsize, 1235 vnd->sc_geom.vng_nsectors, 1236 vnd->sc_geom.vng_ntracks, 1237 vnd->sc_geom.vng_ncylinders); 1238 #endif 1239 1240 /* Attach the disk. */ 1241 disk_attach(&vnd->sc_dkdev); 1242 1243 /* Initialize the xfer and buffer pools. */ 1244 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1245 0, 0, "vndxpl", NULL, IPL_BIO); 1246 1247 /* Try and read the disklabel. */ 1248 vndgetdisklabel(dev, vnd); 1249 1250 vndunlock(vnd); 1251 1252 break; 1253 1254 close_and_exit: 1255 (void) vn_close(nd.ni_vp, fflags, l->l_cred); 1256 unlock_and_exit: 1257 #ifdef VND_COMPRESSION 1258 /* free any allocated memory (for compressed file) */ 1259 if(vnd->sc_comp_offsets) { 1260 free(vnd->sc_comp_offsets, M_DEVBUF); 1261 vnd->sc_comp_offsets = NULL; 1262 } 1263 if(vnd->sc_comp_buff) { 1264 free(vnd->sc_comp_buff, M_DEVBUF); 1265 vnd->sc_comp_buff = NULL; 1266 } 1267 if(vnd->sc_comp_decombuf) { 1268 free(vnd->sc_comp_decombuf, M_DEVBUF); 1269 vnd->sc_comp_decombuf = NULL; 1270 } 1271 #endif /* VND_COMPRESSION */ 1272 vndunlock(vnd); 1273 return (error); 1274 1275 case VNDIOCCLR: 1276 if ((error = vndlock(vnd)) != 0) 1277 return (error); 1278 1279 /* 1280 * Don't unconfigure if any other partitions are open 1281 * or if both the character and block flavors of this 1282 * partition are open. 1283 */ 1284 part = DISKPART(dev); 1285 pmask = (1 << part); 1286 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1287 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1288 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1289 !(vio->vnd_flags & VNDIOF_FORCE)) { 1290 vndunlock(vnd); 1291 return (EBUSY); 1292 } 1293 1294 /* 1295 * XXX vndclear() might call vndclose() implicitely; 1296 * release lock to avoid recursion 1297 */ 1298 vndunlock(vnd); 1299 vndclear(vnd, minor(dev)); 1300 #ifdef DEBUG 1301 if (vnddebug & VDB_INIT) 1302 printf("vndioctl: CLRed\n"); 1303 #endif 1304 1305 /* Destroy the xfer and buffer pools. */ 1306 pool_destroy(&vnd->sc_vxpool); 1307 1308 /* Detatch the disk. */ 1309 disk_detach(&vnd->sc_dkdev); 1310 break; 1311 1312 #ifdef COMPAT_30 1313 case VNDIOOCGET: { 1314 struct vnd_ouser *vnu; 1315 struct vattr va; 1316 vnu = (struct vnd_ouser *)data; 1317 KASSERT(l); 1318 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1319 case 0: 1320 vnu->vnu_dev = va.va_fsid; 1321 vnu->vnu_ino = va.va_fileid; 1322 break; 1323 case -1: 1324 /* unused is not an error */ 1325 vnu->vnu_dev = 0; 1326 vnu->vnu_ino = 0; 1327 break; 1328 default: 1329 return error; 1330 } 1331 break; 1332 } 1333 #endif 1334 case VNDIOCGET: { 1335 struct vnd_user *vnu; 1336 struct vattr va; 1337 vnu = (struct vnd_user *)data; 1338 KASSERT(l); 1339 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1340 case 0: 1341 vnu->vnu_dev = va.va_fsid; 1342 vnu->vnu_ino = va.va_fileid; 1343 break; 1344 case -1: 1345 /* unused is not an error */ 1346 vnu->vnu_dev = 0; 1347 vnu->vnu_ino = 0; 1348 break; 1349 default: 1350 return error; 1351 } 1352 break; 1353 } 1354 1355 case DIOCGDINFO: 1356 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1357 break; 1358 1359 #ifdef __HAVE_OLD_DISKLABEL 1360 case ODIOCGDINFO: 1361 newlabel = *(vnd->sc_dkdev.dk_label); 1362 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1363 return ENOTTY; 1364 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1365 break; 1366 #endif 1367 1368 case DIOCGPART: 1369 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1370 ((struct partinfo *)data)->part = 1371 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1372 break; 1373 1374 case DIOCWDINFO: 1375 case DIOCSDINFO: 1376 #ifdef __HAVE_OLD_DISKLABEL 1377 case ODIOCWDINFO: 1378 case ODIOCSDINFO: 1379 #endif 1380 { 1381 struct disklabel *lp; 1382 1383 if ((error = vndlock(vnd)) != 0) 1384 return (error); 1385 1386 vnd->sc_flags |= VNF_LABELLING; 1387 1388 #ifdef __HAVE_OLD_DISKLABEL 1389 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1390 memset(&newlabel, 0, sizeof newlabel); 1391 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1392 lp = &newlabel; 1393 } else 1394 #endif 1395 lp = (struct disklabel *)data; 1396 1397 error = setdisklabel(vnd->sc_dkdev.dk_label, 1398 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1399 if (error == 0) { 1400 if (cmd == DIOCWDINFO 1401 #ifdef __HAVE_OLD_DISKLABEL 1402 || cmd == ODIOCWDINFO 1403 #endif 1404 ) 1405 error = writedisklabel(VNDLABELDEV(dev), 1406 vndstrategy, vnd->sc_dkdev.dk_label, 1407 vnd->sc_dkdev.dk_cpulabel); 1408 } 1409 1410 vnd->sc_flags &= ~VNF_LABELLING; 1411 1412 vndunlock(vnd); 1413 1414 if (error) 1415 return (error); 1416 break; 1417 } 1418 1419 case DIOCKLABEL: 1420 if (*(int *)data != 0) 1421 vnd->sc_flags |= VNF_KLABEL; 1422 else 1423 vnd->sc_flags &= ~VNF_KLABEL; 1424 break; 1425 1426 case DIOCWLABEL: 1427 if (*(int *)data != 0) 1428 vnd->sc_flags |= VNF_WLABEL; 1429 else 1430 vnd->sc_flags &= ~VNF_WLABEL; 1431 break; 1432 1433 case DIOCGDEFLABEL: 1434 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1435 break; 1436 1437 #ifdef __HAVE_OLD_DISKLABEL 1438 case ODIOCGDEFLABEL: 1439 vndgetdefaultlabel(vnd, &newlabel); 1440 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1441 return ENOTTY; 1442 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1443 break; 1444 #endif 1445 1446 default: 1447 return (ENOTTY); 1448 } 1449 1450 return (0); 1451 } 1452 1453 /* 1454 * Duplicate the current processes' credentials. Since we are called only 1455 * as the result of a SET ioctl and only root can do that, any future access 1456 * to this "disk" is essentially as root. Note that credentials may change 1457 * if some other uid can write directly to the mapped file (NFS). 1458 */ 1459 static int 1460 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1461 { 1462 struct uio auio; 1463 struct iovec aiov; 1464 char *tmpbuf; 1465 int error; 1466 1467 vnd->sc_cred = kauth_cred_dup(cred); 1468 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1469 1470 /* XXX: Horrible kludge to establish credentials for NFS */ 1471 aiov.iov_base = tmpbuf; 1472 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1473 auio.uio_iov = &aiov; 1474 auio.uio_iovcnt = 1; 1475 auio.uio_offset = 0; 1476 auio.uio_rw = UIO_READ; 1477 auio.uio_resid = aiov.iov_len; 1478 UIO_SETUP_SYSSPACE(&auio); 1479 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1480 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1481 if (error == 0) { 1482 /* 1483 * Because vnd does all IO directly through the vnode 1484 * we need to flush (at least) the buffer from the above 1485 * VOP_READ from the buffer cache to prevent cache 1486 * incoherencies. Also, be careful to write dirty 1487 * buffers back to stable storage. 1488 */ 1489 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1490 curlwp, 0, 0); 1491 } 1492 VOP_UNLOCK(vnd->sc_vp, 0); 1493 1494 free(tmpbuf, M_TEMP); 1495 return (error); 1496 } 1497 1498 /* 1499 * Set maxactive based on FS type 1500 */ 1501 static void 1502 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1503 { 1504 #ifdef NFS 1505 extern int (**nfsv2_vnodeop_p)(void *); 1506 1507 if (vp->v_op == nfsv2_vnodeop_p) 1508 vnd->sc_maxactive = 2; 1509 else 1510 #endif 1511 vnd->sc_maxactive = 8; 1512 1513 if (vnd->sc_maxactive < 1) 1514 vnd->sc_maxactive = 1; 1515 } 1516 1517 #if 0 1518 static void 1519 vndshutdown(void) 1520 { 1521 struct vnd_softc *vnd; 1522 1523 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1524 if (vnd->sc_flags & VNF_INITED) 1525 vndclear(vnd); 1526 } 1527 #endif 1528 1529 static void 1530 vndclear(struct vnd_softc *vnd, int myminor) 1531 { 1532 struct vnode *vp = vnd->sc_vp; 1533 int fflags = FREAD; 1534 int bmaj, cmaj, i, mn; 1535 int s; 1536 1537 #ifdef DEBUG 1538 if (vnddebug & VDB_FOLLOW) 1539 printf("vndclear(%p): vp %p\n", vnd, vp); 1540 #endif 1541 /* locate the major number */ 1542 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1543 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1544 1545 /* Nuke the vnodes for any open instances */ 1546 for (i = 0; i < MAXPARTITIONS; i++) { 1547 mn = DISKMINOR(device_unit(vnd->sc_dev), i); 1548 vdevgone(bmaj, mn, mn, VBLK); 1549 if (mn != myminor) /* XXX avoid to kill own vnode */ 1550 vdevgone(cmaj, mn, mn, VCHR); 1551 } 1552 1553 if ((vnd->sc_flags & VNF_READONLY) == 0) 1554 fflags |= FWRITE; 1555 1556 s = splbio(); 1557 bufq_drain(vnd->sc_tab); 1558 splx(s); 1559 1560 vnd->sc_flags |= VNF_VUNCONF; 1561 wakeup(&vnd->sc_tab); 1562 while (vnd->sc_flags & VNF_KTHREAD) 1563 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1564 1565 #ifdef VND_COMPRESSION 1566 /* free the compressed file buffers */ 1567 if(vnd->sc_flags & VNF_COMP) { 1568 if(vnd->sc_comp_offsets) { 1569 free(vnd->sc_comp_offsets, M_DEVBUF); 1570 vnd->sc_comp_offsets = NULL; 1571 } 1572 if(vnd->sc_comp_buff) { 1573 free(vnd->sc_comp_buff, M_DEVBUF); 1574 vnd->sc_comp_buff = NULL; 1575 } 1576 if(vnd->sc_comp_decombuf) { 1577 free(vnd->sc_comp_decombuf, M_DEVBUF); 1578 vnd->sc_comp_decombuf = NULL; 1579 } 1580 } 1581 #endif /* VND_COMPRESSION */ 1582 vnd->sc_flags &= 1583 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1584 | VNF_VUNCONF | VNF_COMP); 1585 if (vp == (struct vnode *)0) 1586 panic("vndclear: null vp"); 1587 (void) vn_close(vp, fflags, vnd->sc_cred); 1588 kauth_cred_free(vnd->sc_cred); 1589 vnd->sc_vp = (struct vnode *)0; 1590 vnd->sc_cred = (kauth_cred_t)0; 1591 vnd->sc_size = 0; 1592 } 1593 1594 static int 1595 vndsize(dev_t dev) 1596 { 1597 struct vnd_softc *sc; 1598 struct disklabel *lp; 1599 int part, unit, omask; 1600 int size; 1601 1602 unit = vndunit(dev); 1603 sc = device_lookup_private(&vnd_cd, unit); 1604 if (sc == NULL) 1605 return -1; 1606 1607 if ((sc->sc_flags & VNF_INITED) == 0) 1608 return (-1); 1609 1610 part = DISKPART(dev); 1611 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1612 lp = sc->sc_dkdev.dk_label; 1613 1614 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1615 return (-1); 1616 1617 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1618 size = -1; 1619 else 1620 size = lp->d_partitions[part].p_size * 1621 (lp->d_secsize / DEV_BSIZE); 1622 1623 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1624 return (-1); 1625 1626 return (size); 1627 } 1628 1629 static int 1630 vnddump(dev_t dev, daddr_t blkno, void *va, 1631 size_t size) 1632 { 1633 1634 /* Not implemented. */ 1635 return ENXIO; 1636 } 1637 1638 static void 1639 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1640 { 1641 struct vndgeom *vng = &sc->sc_geom; 1642 struct partition *pp; 1643 1644 memset(lp, 0, sizeof(*lp)); 1645 1646 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1647 lp->d_secsize = vng->vng_secsize; 1648 lp->d_nsectors = vng->vng_nsectors; 1649 lp->d_ntracks = vng->vng_ntracks; 1650 lp->d_ncylinders = vng->vng_ncylinders; 1651 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1652 1653 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1654 lp->d_type = DTYPE_VND; 1655 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1656 lp->d_rpm = 3600; 1657 lp->d_interleave = 1; 1658 lp->d_flags = 0; 1659 1660 pp = &lp->d_partitions[RAW_PART]; 1661 pp->p_offset = 0; 1662 pp->p_size = lp->d_secperunit; 1663 pp->p_fstype = FS_UNUSED; 1664 lp->d_npartitions = RAW_PART + 1; 1665 1666 lp->d_magic = DISKMAGIC; 1667 lp->d_magic2 = DISKMAGIC; 1668 lp->d_checksum = dkcksum(lp); 1669 } 1670 1671 /* 1672 * Read the disklabel from a vnd. If one is not present, create a fake one. 1673 */ 1674 static void 1675 vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1676 { 1677 const char *errstring; 1678 struct disklabel *lp = sc->sc_dkdev.dk_label; 1679 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1680 int i; 1681 1682 memset(clp, 0, sizeof(*clp)); 1683 1684 vndgetdefaultlabel(sc, lp); 1685 1686 /* 1687 * Call the generic disklabel extraction routine. 1688 */ 1689 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1690 if (errstring) { 1691 /* 1692 * Lack of disklabel is common, but we print the warning 1693 * anyway, since it might contain other useful information. 1694 */ 1695 aprint_normal_dev(sc->sc_dev, "%s\n", errstring); 1696 1697 /* 1698 * For historical reasons, if there's no disklabel 1699 * present, all partitions must be FS_BSDFFS and 1700 * occupy the entire disk. 1701 */ 1702 for (i = 0; i < MAXPARTITIONS; i++) { 1703 /* 1704 * Don't wipe out port specific hack (such as 1705 * dos partition hack of i386 port). 1706 */ 1707 if (lp->d_partitions[i].p_size != 0) 1708 continue; 1709 1710 lp->d_partitions[i].p_size = lp->d_secperunit; 1711 lp->d_partitions[i].p_offset = 0; 1712 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1713 } 1714 1715 strncpy(lp->d_packname, "default label", 1716 sizeof(lp->d_packname)); 1717 1718 lp->d_npartitions = MAXPARTITIONS; 1719 lp->d_checksum = dkcksum(lp); 1720 } 1721 1722 /* In-core label now valid. */ 1723 sc->sc_flags |= VNF_VLABEL; 1724 } 1725 1726 /* 1727 * Wait interruptibly for an exclusive lock. 1728 * 1729 * XXX 1730 * Several drivers do this; it should be abstracted and made MP-safe. 1731 */ 1732 static int 1733 vndlock(struct vnd_softc *sc) 1734 { 1735 int error; 1736 1737 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1738 sc->sc_flags |= VNF_WANTED; 1739 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1740 return (error); 1741 } 1742 sc->sc_flags |= VNF_LOCKED; 1743 return (0); 1744 } 1745 1746 /* 1747 * Unlock and wake up any waiters. 1748 */ 1749 static void 1750 vndunlock(struct vnd_softc *sc) 1751 { 1752 1753 sc->sc_flags &= ~VNF_LOCKED; 1754 if ((sc->sc_flags & VNF_WANTED) != 0) { 1755 sc->sc_flags &= ~VNF_WANTED; 1756 wakeup(sc); 1757 } 1758 } 1759 1760 #ifdef VND_COMPRESSION 1761 /* compressed file read */ 1762 static void 1763 compstrategy(struct buf *bp, off_t bn) 1764 { 1765 int error; 1766 int unit = vndunit(bp->b_dev); 1767 struct vnd_softc *vnd = 1768 device_lookup_private(&vnd_cd, unit); 1769 u_int32_t comp_block; 1770 struct uio auio; 1771 char *addr; 1772 int s; 1773 1774 /* set up constants for data move */ 1775 auio.uio_rw = UIO_READ; 1776 UIO_SETUP_SYSSPACE(&auio); 1777 1778 /* read, and transfer the data */ 1779 addr = bp->b_data; 1780 bp->b_resid = bp->b_bcount; 1781 s = splbio(); 1782 while (bp->b_resid > 0) { 1783 unsigned length; 1784 size_t length_in_buffer; 1785 u_int32_t offset_in_buffer; 1786 struct iovec aiov; 1787 1788 /* calculate the compressed block number */ 1789 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1790 1791 /* check for good block number */ 1792 if (comp_block >= vnd->sc_comp_numoffs) { 1793 bp->b_error = EINVAL; 1794 splx(s); 1795 return; 1796 } 1797 1798 /* read in the compressed block, if not in buffer */ 1799 if (comp_block != vnd->sc_comp_buffblk) { 1800 length = vnd->sc_comp_offsets[comp_block + 1] - 1801 vnd->sc_comp_offsets[comp_block]; 1802 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1803 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1804 length, vnd->sc_comp_offsets[comp_block], 1805 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred, 1806 NULL, NULL); 1807 if (error) { 1808 bp->b_error = error; 1809 VOP_UNLOCK(vnd->sc_vp, 0); 1810 splx(s); 1811 return; 1812 } 1813 /* uncompress the buffer */ 1814 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1815 vnd->sc_comp_stream.avail_in = length; 1816 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1817 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1818 inflateReset(&vnd->sc_comp_stream); 1819 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1820 if (error != Z_STREAM_END) { 1821 if (vnd->sc_comp_stream.msg) 1822 aprint_normal_dev(vnd->sc_dev, 1823 "compressed file, %s\n", 1824 vnd->sc_comp_stream.msg); 1825 bp->b_error = EBADMSG; 1826 VOP_UNLOCK(vnd->sc_vp, 0); 1827 splx(s); 1828 return; 1829 } 1830 vnd->sc_comp_buffblk = comp_block; 1831 VOP_UNLOCK(vnd->sc_vp, 0); 1832 } 1833 1834 /* transfer the usable uncompressed data */ 1835 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1836 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1837 if (length_in_buffer > bp->b_resid) 1838 length_in_buffer = bp->b_resid; 1839 auio.uio_iov = &aiov; 1840 auio.uio_iovcnt = 1; 1841 aiov.iov_base = addr; 1842 aiov.iov_len = length_in_buffer; 1843 auio.uio_resid = aiov.iov_len; 1844 auio.uio_offset = 0; 1845 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1846 length_in_buffer, &auio); 1847 if (error) { 1848 bp->b_error = error; 1849 splx(s); 1850 return; 1851 } 1852 1853 bn += length_in_buffer; 1854 addr += length_in_buffer; 1855 bp->b_resid -= length_in_buffer; 1856 } 1857 splx(s); 1858 } 1859 1860 /* compression memory allocation routines */ 1861 static void * 1862 vnd_alloc(void *aux, u_int items, u_int siz) 1863 { 1864 return malloc(items * siz, M_TEMP, M_NOWAIT); 1865 } 1866 1867 static void 1868 vnd_free(void *aux, void *ptr) 1869 { 1870 free(ptr, M_TEMP); 1871 } 1872 #endif /* VND_COMPRESSION */ 1873 1874 static void 1875 vnd_set_properties(struct vnd_softc *vnd) 1876 { 1877 prop_dictionary_t disk_info, odisk_info, geom; 1878 1879 disk_info = prop_dictionary_create(); 1880 1881 geom = prop_dictionary_create(); 1882 1883 prop_dictionary_set_uint64(geom, "sectors-per-unit", 1884 vnd->sc_geom.vng_nsectors * vnd->sc_geom.vng_ntracks * 1885 vnd->sc_geom.vng_ncylinders); 1886 1887 prop_dictionary_set_uint32(geom, "sector-size", 1888 vnd->sc_geom.vng_secsize); 1889 1890 prop_dictionary_set_uint16(geom, "sectors-per-track", 1891 vnd->sc_geom.vng_nsectors); 1892 1893 prop_dictionary_set_uint16(geom, "tracks-per-cylinder", 1894 vnd->sc_geom.vng_ntracks); 1895 1896 prop_dictionary_set_uint64(geom, "cylinders-per-unit", 1897 vnd->sc_geom.vng_ncylinders); 1898 1899 prop_dictionary_set(disk_info, "geometry", geom); 1900 prop_object_release(geom); 1901 1902 prop_dictionary_set(device_properties(vnd->sc_dev), 1903 "disk-info", disk_info); 1904 1905 /* 1906 * Don't release disk_info here; we keep a reference to it. 1907 * disk_detach() will release it when we go away. 1908 */ 1909 1910 odisk_info = vnd->sc_dkdev.dk_info; 1911 vnd->sc_dkdev.dk_info = disk_info; 1912 if (odisk_info) 1913 prop_object_release(odisk_info); 1914 } 1915