1 /* $NetBSD: vnd.c,v 1.155 2006/10/14 01:49:20 dogcow Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 72 * 73 * @(#)vn.c 8.9 (Berkeley) 5/14/95 74 */ 75 76 /* 77 * Copyright (c) 1988 University of Utah. 78 * 79 * This code is derived from software contributed to Berkeley by 80 * the Systems Programming Group of the University of Utah Computer 81 * Science Department. 82 * 83 * Redistribution and use in source and binary forms, with or without 84 * modification, are permitted provided that the following conditions 85 * are met: 86 * 1. Redistributions of source code must retain the above copyright 87 * notice, this list of conditions and the following disclaimer. 88 * 2. Redistributions in binary form must reproduce the above copyright 89 * notice, this list of conditions and the following disclaimer in the 90 * documentation and/or other materials provided with the distribution. 91 * 3. All advertising materials mentioning features or use of this software 92 * must display the following acknowledgement: 93 * This product includes software developed by the University of 94 * California, Berkeley and its contributors. 95 * 4. Neither the name of the University nor the names of its contributors 96 * may be used to endorse or promote products derived from this software 97 * without specific prior written permission. 98 * 99 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 101 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 102 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 103 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 104 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 105 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 106 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 107 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 108 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 109 * SUCH DAMAGE. 110 * 111 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 112 * 113 * @(#)vn.c 8.9 (Berkeley) 5/14/95 114 */ 115 116 /* 117 * Vnode disk driver. 118 * 119 * Block/character interface to a vnode. Allows one to treat a file 120 * as a disk (e.g. build a filesystem in it, mount it, etc.). 121 * 122 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 123 * instead of a simple VOP_RDWR. We do this to avoid distorting the 124 * local buffer cache. 125 * 126 * NOTE 2: There is a security issue involved with this driver. 127 * Once mounted all access to the contents of the "mapped" file via 128 * the special file is controlled by the permissions on the special 129 * file, the protection of the mapped file is ignored (effectively, 130 * by using root credentials in all transactions). 131 * 132 * NOTE 3: Doesn't interact with leases, should it? 133 */ 134 135 #include <sys/cdefs.h> 136 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.155 2006/10/14 01:49:20 dogcow Exp $"); 137 138 #if defined(_KERNEL_OPT) 139 #include "fs_nfs.h" 140 #include "opt_vnd.h" 141 #endif 142 143 #include <sys/param.h> 144 #include <sys/systm.h> 145 #include <sys/namei.h> 146 #include <sys/proc.h> 147 #include <sys/kthread.h> 148 #include <sys/errno.h> 149 #include <sys/buf.h> 150 #include <sys/bufq.h> 151 #include <sys/malloc.h> 152 #include <sys/ioctl.h> 153 #include <sys/disklabel.h> 154 #include <sys/device.h> 155 #include <sys/disk.h> 156 #include <sys/stat.h> 157 #include <sys/mount.h> 158 #include <sys/vnode.h> 159 #include <sys/file.h> 160 #include <sys/uio.h> 161 #include <sys/conf.h> 162 #include <sys/kauth.h> 163 164 #include <net/zlib.h> 165 166 #include <miscfs/specfs/specdev.h> 167 168 #include <dev/vndvar.h> 169 170 #if defined(VNDDEBUG) && !defined(DEBUG) 171 #define DEBUG 172 #endif 173 174 #ifdef DEBUG 175 int dovndcluster = 1; 176 #define VDB_FOLLOW 0x01 177 #define VDB_INIT 0x02 178 #define VDB_IO 0x04 179 #define VDB_LABEL 0x08 180 int vnddebug = 0x00; 181 #endif 182 183 #define vndunit(x) DISKUNIT(x) 184 185 struct vndxfer { 186 struct buf vx_buf; 187 struct vnd_softc *vx_vnd; 188 }; 189 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 190 191 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 192 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 193 194 #define VNDLABELDEV(dev) \ 195 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 196 197 /* called by main() at boot time (XXX: and the LKM driver) */ 198 void vndattach(int); 199 200 static void vndclear(struct vnd_softc *, int); 201 static int vndsetcred(struct vnd_softc *, kauth_cred_t); 202 static void vndthrottle(struct vnd_softc *, struct vnode *); 203 static void vndiodone(struct buf *); 204 #if 0 205 static void vndshutdown(void); 206 #endif 207 208 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 209 static void vndgetdisklabel(dev_t, struct vnd_softc *); 210 211 static int vndlock(struct vnd_softc *); 212 static void vndunlock(struct vnd_softc *); 213 #ifdef VND_COMPRESSION 214 static void compstrategy(struct buf *, off_t); 215 static void *vnd_alloc(void *, u_int, u_int); 216 static void vnd_free(void *, void *); 217 #endif /* VND_COMPRESSION */ 218 219 static void vndthread(void *); 220 221 static dev_type_open(vndopen); 222 static dev_type_close(vndclose); 223 static dev_type_read(vndread); 224 static dev_type_write(vndwrite); 225 static dev_type_ioctl(vndioctl); 226 static dev_type_strategy(vndstrategy); 227 static dev_type_dump(vnddump); 228 static dev_type_size(vndsize); 229 230 const struct bdevsw vnd_bdevsw = { 231 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 232 }; 233 234 const struct cdevsw vnd_cdevsw = { 235 vndopen, vndclose, vndread, vndwrite, vndioctl, 236 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 237 }; 238 239 static int vnd_match(struct device *, struct cfdata *, void *); 240 static void vnd_attach(struct device *, struct device *, void *); 241 static int vnd_detach(struct device *, int); 242 243 CFATTACH_DECL(vnd, sizeof(struct vnd_softc), 244 vnd_match, vnd_attach, vnd_detach, NULL); 245 extern struct cfdriver vnd_cd; 246 247 static struct vnd_softc *vnd_spawn(int); 248 int vnd_destroy(struct device *); 249 250 void 251 vndattach(int num __unused) 252 { 253 int error; 254 255 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 256 if (error) 257 aprint_error("%s: unable to register cfattach\n", 258 vnd_cd.cd_name); 259 } 260 261 static int 262 vnd_match(struct device *self __unused, struct cfdata *cfdata __unused, 263 void *aux __unused) 264 { 265 return 1; 266 } 267 268 static void 269 vnd_attach(struct device *parent __unused, struct device *self, 270 void *aux __unused) 271 { 272 struct vnd_softc *sc = (struct vnd_softc *)self; 273 274 sc->sc_comp_offsets = NULL; 275 sc->sc_comp_buff = NULL; 276 sc->sc_comp_decombuf = NULL; 277 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 278 pseudo_disk_init(&sc->sc_dkdev); 279 } 280 281 static int 282 vnd_detach(struct device *self, int flags __unused) 283 { 284 struct vnd_softc *sc = (struct vnd_softc *)self; 285 if (sc->sc_flags & VNF_INITED) 286 return EBUSY; 287 288 bufq_free(sc->sc_tab); 289 290 return 0; 291 } 292 293 static struct vnd_softc * 294 vnd_spawn(int unit) 295 { 296 struct cfdata *cf; 297 298 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 299 cf->cf_name = vnd_cd.cd_name; 300 cf->cf_atname = vnd_cd.cd_name; 301 cf->cf_unit = unit; 302 cf->cf_fstate = FSTATE_STAR; 303 304 return (struct vnd_softc *)config_attach_pseudo(cf); 305 } 306 307 int 308 vnd_destroy(struct device *dev) 309 { 310 int error; 311 struct cfdata *cf; 312 313 cf = device_cfdata(dev); 314 error = config_detach(dev, DETACH_QUIET); 315 if (error) 316 return error; 317 free(cf, M_DEVBUF); 318 return 0; 319 } 320 321 static int 322 vndopen(dev_t dev, int flags __unused, int mode, struct lwp *l __unused) 323 { 324 int unit = vndunit(dev); 325 struct vnd_softc *sc; 326 int error = 0, part, pmask; 327 struct disklabel *lp; 328 329 #ifdef DEBUG 330 if (vnddebug & VDB_FOLLOW) 331 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 332 #endif 333 sc = device_lookup(&vnd_cd, unit); 334 if (sc == NULL) { 335 sc = vnd_spawn(unit); 336 if (sc == NULL) 337 return ENOMEM; 338 } 339 340 if ((error = vndlock(sc)) != 0) 341 return (error); 342 343 lp = sc->sc_dkdev.dk_label; 344 345 part = DISKPART(dev); 346 pmask = (1 << part); 347 348 /* 349 * If we're initialized, check to see if there are any other 350 * open partitions. If not, then it's safe to update the 351 * in-core disklabel. Only read the disklabel if it is 352 * not already valid. 353 */ 354 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 355 sc->sc_dkdev.dk_openmask == 0) 356 vndgetdisklabel(dev, sc); 357 358 /* Check that the partitions exists. */ 359 if (part != RAW_PART) { 360 if (((sc->sc_flags & VNF_INITED) == 0) || 361 ((part >= lp->d_npartitions) || 362 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 363 error = ENXIO; 364 goto done; 365 } 366 } 367 368 /* Prevent our unit from being unconfigured while open. */ 369 switch (mode) { 370 case S_IFCHR: 371 sc->sc_dkdev.dk_copenmask |= pmask; 372 break; 373 374 case S_IFBLK: 375 sc->sc_dkdev.dk_bopenmask |= pmask; 376 break; 377 } 378 sc->sc_dkdev.dk_openmask = 379 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 380 381 done: 382 vndunlock(sc); 383 return (error); 384 } 385 386 static int 387 vndclose(dev_t dev, int flags __unused, int mode, struct lwp *l __unused) 388 { 389 int unit = vndunit(dev); 390 struct vnd_softc *sc; 391 int error = 0, part; 392 393 #ifdef DEBUG 394 if (vnddebug & VDB_FOLLOW) 395 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 396 #endif 397 sc = device_lookup(&vnd_cd, unit); 398 if (sc == NULL) 399 return ENXIO; 400 401 if ((error = vndlock(sc)) != 0) 402 return (error); 403 404 part = DISKPART(dev); 405 406 /* ...that much closer to allowing unconfiguration... */ 407 switch (mode) { 408 case S_IFCHR: 409 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 410 break; 411 412 case S_IFBLK: 413 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 414 break; 415 } 416 sc->sc_dkdev.dk_openmask = 417 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 418 419 vndunlock(sc); 420 421 if ((sc->sc_flags & VNF_INITED) == 0) { 422 if ((error = vnd_destroy((struct device *)sc)) != 0) { 423 aprint_error("%s: unable to detach instance\n", 424 sc->sc_dev.dv_xname); 425 return error; 426 } 427 } 428 429 return (0); 430 } 431 432 /* 433 * Queue the request, and wakeup the kernel thread to handle it. 434 */ 435 static void 436 vndstrategy(struct buf *bp) 437 { 438 int unit = vndunit(bp->b_dev); 439 struct vnd_softc *vnd = 440 (struct vnd_softc *)device_lookup(&vnd_cd, unit); 441 struct disklabel *lp = vnd->sc_dkdev.dk_label; 442 daddr_t blkno; 443 int s = splbio(); 444 445 bp->b_resid = bp->b_bcount; 446 447 if ((vnd->sc_flags & VNF_INITED) == 0) { 448 bp->b_error = ENXIO; 449 bp->b_flags |= B_ERROR; 450 goto done; 451 } 452 453 /* 454 * The transfer must be a whole number of blocks. 455 */ 456 if ((bp->b_bcount % lp->d_secsize) != 0) { 457 bp->b_error = EINVAL; 458 bp->b_flags |= B_ERROR; 459 goto done; 460 } 461 462 /* 463 * check if we're read-only. 464 */ 465 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 466 bp->b_error = EACCES; 467 bp->b_flags |= B_ERROR; 468 goto done; 469 } 470 471 /* 472 * Do bounds checking and adjust transfer. If there's an error, 473 * the bounds check will flag that for us. 474 */ 475 if (DISKPART(bp->b_dev) == RAW_PART) { 476 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 477 vnd->sc_size) <= 0) 478 goto done; 479 } else { 480 if (bounds_check_with_label(&vnd->sc_dkdev, 481 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 482 goto done; 483 } 484 485 /* If it's a nil transfer, wake up the top half now. */ 486 if (bp->b_bcount == 0) 487 goto done; 488 489 /* 490 * Put the block number in terms of the logical blocksize 491 * of the "device". 492 */ 493 494 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 495 496 /* 497 * Translate the partition-relative block number to an absolute. 498 */ 499 if (DISKPART(bp->b_dev) != RAW_PART) { 500 struct partition *pp; 501 502 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 503 DISKPART(bp->b_dev)]; 504 blkno += pp->p_offset; 505 } 506 bp->b_rawblkno = blkno; 507 508 #ifdef DEBUG 509 if (vnddebug & VDB_FOLLOW) 510 printf("vndstrategy(%p): unit %d\n", bp, unit); 511 #endif 512 BUFQ_PUT(vnd->sc_tab, bp); 513 wakeup(&vnd->sc_tab); 514 splx(s); 515 return; 516 done: 517 biodone(bp); 518 splx(s); 519 } 520 521 static void 522 vndthread(void *arg) 523 { 524 struct vnd_softc *vnd = arg; 525 struct mount *mp; 526 int s, bsize; 527 int sz, error; 528 struct disklabel *lp; 529 530 s = splbio(); 531 vnd->sc_flags |= VNF_KTHREAD; 532 wakeup(&vnd->sc_kthread); 533 534 /* 535 * Dequeue requests, break them into bsize pieces and submit using 536 * VOP_BMAP/VOP_STRATEGY. 537 */ 538 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 539 struct vndxfer *vnx; 540 off_t offset; 541 int resid; 542 int skipped = 0; 543 off_t bn; 544 int flags; 545 struct buf *obp; 546 struct buf *bp; 547 548 obp = BUFQ_GET(vnd->sc_tab); 549 if (obp == NULL) { 550 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 551 continue; 552 }; 553 splx(s); 554 flags = obp->b_flags; 555 #ifdef DEBUG 556 if (vnddebug & VDB_FOLLOW) 557 printf("vndthread(%p\n", obp); 558 #endif 559 lp = vnd->sc_dkdev.dk_label; 560 561 /* convert to a byte offset within the file. */ 562 bn = obp->b_rawblkno * lp->d_secsize; 563 564 if (vnd->sc_vp->v_mount == NULL) { 565 obp->b_error = ENXIO; 566 obp->b_flags |= B_ERROR; 567 goto done; 568 } 569 #ifdef VND_COMPRESSION 570 /* handle a compressed read */ 571 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 572 compstrategy(obp, bn); 573 goto done; 574 } 575 #endif /* VND_COMPRESSION */ 576 577 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 578 579 /* 580 * Allocate a header for this transfer and link it to the 581 * buffer 582 */ 583 s = splbio(); 584 vnx = VND_GETXFER(vnd); 585 splx(s); 586 vnx->vx_vnd = vnd; 587 588 bp = &vnx->vx_buf; 589 BUF_INIT(bp); 590 bp->b_flags = (obp->b_flags & B_READ) | B_CALL; 591 bp->b_iodone = vndiodone; 592 bp->b_private = obp; 593 bp->b_vp = vnd->sc_vp; 594 bp->b_data = obp->b_data; 595 bp->b_bcount = bp->b_resid = obp->b_bcount; 596 BIO_COPYPRIO(bp, obp); 597 598 s = splbio(); 599 while (vnd->sc_active >= vnd->sc_maxactive) { 600 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 601 } 602 vnd->sc_active++; 603 splx(s); 604 605 if ((flags & B_READ) == 0) { 606 s = splbio(); 607 V_INCR_NUMOUTPUT(bp->b_vp); 608 splx(s); 609 610 vn_start_write(vnd->sc_vp, &mp, V_WAIT); 611 } 612 613 /* Instrumentation. */ 614 disk_busy(&vnd->sc_dkdev); 615 616 /* 617 * Feed requests sequentially. 618 * We do it this way to keep from flooding NFS servers if we 619 * are connected to an NFS file. This places the burden on 620 * the client rather than the server. 621 */ 622 error = 0; 623 for (offset = 0, resid = bp->b_resid; resid; 624 resid -= sz, offset += sz) { 625 struct buf *nbp; 626 struct vnode *vp; 627 daddr_t nbn; 628 int off, nra; 629 630 nra = 0; 631 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 632 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 633 VOP_UNLOCK(vnd->sc_vp, 0); 634 635 if (error == 0 && (long)nbn == -1) 636 error = EIO; 637 638 /* 639 * If there was an error or a hole in the file...punt. 640 * Note that we may have to wait for any operations 641 * that we have already fired off before releasing 642 * the buffer. 643 * 644 * XXX we could deal with holes here but it would be 645 * a hassle (in the write case). 646 */ 647 if (error) { 648 skipped += resid; 649 break; 650 } 651 652 #ifdef DEBUG 653 if (!dovndcluster) 654 nra = 0; 655 #endif 656 657 off = bn % bsize; 658 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 659 #ifdef DEBUG 660 if (vnddebug & VDB_IO) 661 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 662 " sz 0x%x\n", 663 vnd->sc_vp, vp, (long long)bn, nbn, sz); 664 #endif 665 666 nbp = getiobuf(); 667 nestiobuf_setup(bp, nbp, offset, sz); 668 nbp->b_blkno = nbn + btodb(off); 669 670 #if 0 /* XXX #ifdef DEBUG */ 671 if (vnddebug & VDB_IO) 672 printf("vndstart(%ld): bp %p vp %p blkno " 673 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 674 (long) (vnd-vnd_softc), &nbp->vb_buf, 675 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 676 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 677 nbp->vb_buf.b_bcount); 678 #endif 679 VOP_STRATEGY(vp, nbp); 680 bn += sz; 681 } 682 nestiobuf_done(bp, skipped, error); 683 684 if ((flags & B_READ) == 0) 685 vn_finished_write(mp, 0); 686 687 s = splbio(); 688 continue; 689 done: 690 biodone(obp); 691 s = splbio(); 692 } 693 694 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 695 wakeup(&vnd->sc_kthread); 696 splx(s); 697 kthread_exit(0); 698 } 699 700 static void 701 vndiodone(struct buf *bp) 702 { 703 struct vndxfer *vnx = VND_BUFTOXFER(bp); 704 struct vnd_softc *vnd = vnx->vx_vnd; 705 struct buf *obp = bp->b_private; 706 707 KASSERT(&vnx->vx_buf == bp); 708 KASSERT(vnd->sc_active > 0); 709 #ifdef DEBUG 710 if (vnddebug & VDB_IO) { 711 printf("vndiodone1: bp %p iodone: error %d\n", 712 bp, (bp->b_flags & B_ERROR) != 0 ? bp->b_error : 0); 713 } 714 #endif 715 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 716 (bp->b_flags & B_READ)); 717 vnd->sc_active--; 718 if (vnd->sc_active == 0) { 719 wakeup(&vnd->sc_tab); 720 } 721 obp->b_flags |= bp->b_flags & B_ERROR; 722 obp->b_error = bp->b_error; 723 obp->b_resid = bp->b_resid; 724 VND_PUTXFER(vnd, vnx); 725 biodone(obp); 726 } 727 728 /* ARGSUSED */ 729 static int 730 vndread(dev_t dev, struct uio *uio, int flags __unused) 731 { 732 int unit = vndunit(dev); 733 struct vnd_softc *sc; 734 735 #ifdef DEBUG 736 if (vnddebug & VDB_FOLLOW) 737 printf("vndread(0x%x, %p)\n", dev, uio); 738 #endif 739 740 sc = device_lookup(&vnd_cd, unit); 741 if (sc == NULL) 742 return ENXIO; 743 744 if ((sc->sc_flags & VNF_INITED) == 0) 745 return (ENXIO); 746 747 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 748 } 749 750 /* ARGSUSED */ 751 static int 752 vndwrite(dev_t dev, struct uio *uio, int flags __unused) 753 { 754 int unit = vndunit(dev); 755 struct vnd_softc *sc; 756 757 #ifdef DEBUG 758 if (vnddebug & VDB_FOLLOW) 759 printf("vndwrite(0x%x, %p)\n", dev, uio); 760 #endif 761 762 sc = device_lookup(&vnd_cd, unit); 763 if (sc == NULL) 764 return ENXIO; 765 766 if ((sc->sc_flags & VNF_INITED) == 0) 767 return (ENXIO); 768 769 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 770 } 771 772 static int 773 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 774 { 775 struct vnd_softc *vnd; 776 777 if (*un == -1) 778 *un = unit; 779 if (*un < 0) 780 return EINVAL; 781 782 vnd = device_lookup(&vnd_cd, *un); 783 if (vnd == NULL) 784 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1; 785 786 if ((vnd->sc_flags & VNF_INITED) == 0) 787 return -1; 788 789 return VOP_GETATTR(vnd->sc_vp, va, l->l_cred, l); 790 } 791 792 /* ARGSUSED */ 793 static int 794 vndioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l) 795 { 796 int unit = vndunit(dev); 797 struct vnd_softc *vnd; 798 struct vnd_ioctl *vio; 799 struct vattr vattr; 800 struct nameidata nd; 801 int error, part, pmask; 802 size_t geomsize; 803 int fflags; 804 #ifdef __HAVE_OLD_DISKLABEL 805 struct disklabel newlabel; 806 #endif 807 808 #ifdef DEBUG 809 if (vnddebug & VDB_FOLLOW) 810 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 811 dev, cmd, data, flag, l->l_proc, unit); 812 #endif 813 vnd = device_lookup(&vnd_cd, unit); 814 if (vnd == NULL && 815 #ifdef COMPAT_30 816 cmd != VNDIOOCGET && 817 #endif 818 cmd != VNDIOCGET) 819 return ENXIO; 820 vio = (struct vnd_ioctl *)data; 821 822 /* Must be open for writes for these commands... */ 823 switch (cmd) { 824 case VNDIOCSET: 825 case VNDIOCCLR: 826 case DIOCSDINFO: 827 case DIOCWDINFO: 828 #ifdef __HAVE_OLD_DISKLABEL 829 case ODIOCSDINFO: 830 case ODIOCWDINFO: 831 #endif 832 case DIOCKLABEL: 833 case DIOCWLABEL: 834 if ((flag & FWRITE) == 0) 835 return (EBADF); 836 } 837 838 /* Must be initialized for these... */ 839 switch (cmd) { 840 case VNDIOCCLR: 841 case DIOCGDINFO: 842 case DIOCSDINFO: 843 case DIOCWDINFO: 844 case DIOCGPART: 845 case DIOCKLABEL: 846 case DIOCWLABEL: 847 case DIOCGDEFLABEL: 848 #ifdef __HAVE_OLD_DISKLABEL 849 case ODIOCGDINFO: 850 case ODIOCSDINFO: 851 case ODIOCWDINFO: 852 case ODIOCGDEFLABEL: 853 #endif 854 if ((vnd->sc_flags & VNF_INITED) == 0) 855 return (ENXIO); 856 } 857 858 switch (cmd) { 859 case VNDIOCSET: 860 if (vnd->sc_flags & VNF_INITED) 861 return (EBUSY); 862 863 if ((error = vndlock(vnd)) != 0) 864 return (error); 865 866 fflags = FREAD; 867 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 868 fflags |= FWRITE; 869 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, l); 870 if ((error = vn_open(&nd, fflags, 0)) != 0) 871 goto unlock_and_exit; 872 KASSERT(l); 873 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred, l); 874 if (!error && nd.ni_vp->v_type != VREG) 875 error = EOPNOTSUPP; 876 if (error) { 877 VOP_UNLOCK(nd.ni_vp, 0); 878 goto close_and_exit; 879 } 880 881 /* If using a compressed file, initialize its info */ 882 /* (or abort with an error if kernel has no compression) */ 883 if (vio->vnd_flags & VNF_COMP) { 884 #ifdef VND_COMPRESSION 885 struct vnd_comp_header *ch; 886 int i; 887 u_int32_t comp_size; 888 u_int32_t comp_maxsize; 889 890 /* allocate space for compresed file header */ 891 ch = malloc(sizeof(struct vnd_comp_header), 892 M_TEMP, M_WAITOK); 893 894 /* read compressed file header */ 895 error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)ch, 896 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 897 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 898 if(error) { 899 free(ch, M_TEMP); 900 VOP_UNLOCK(nd.ni_vp, 0); 901 goto close_and_exit; 902 } 903 904 /* save some header info */ 905 vnd->sc_comp_blksz = ntohl(ch->block_size); 906 /* note last offset is the file byte size */ 907 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 908 free(ch, M_TEMP); 909 if (vnd->sc_comp_blksz == 0 || 910 vnd->sc_comp_blksz % DEV_BSIZE !=0) { 911 VOP_UNLOCK(nd.ni_vp, 0); 912 error = EINVAL; 913 goto close_and_exit; 914 } 915 if(sizeof(struct vnd_comp_header) + 916 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 917 vattr.va_size) { 918 VOP_UNLOCK(nd.ni_vp, 0); 919 error = EINVAL; 920 goto close_and_exit; 921 } 922 923 /* set decompressed file size */ 924 vattr.va_size = 925 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 926 (u_quad_t)vnd->sc_comp_blksz; 927 928 /* allocate space for all the compressed offsets */ 929 vnd->sc_comp_offsets = 930 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 931 M_DEVBUF, M_WAITOK); 932 933 /* read in the offsets */ 934 error = vn_rdwr(UIO_READ, nd.ni_vp, 935 (caddr_t)vnd->sc_comp_offsets, 936 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 937 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 938 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 939 if(error) { 940 VOP_UNLOCK(nd.ni_vp, 0); 941 goto close_and_exit; 942 } 943 /* 944 * find largest block size (used for allocation limit). 945 * Also convert offset to native byte order. 946 */ 947 comp_maxsize = 0; 948 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 949 vnd->sc_comp_offsets[i] = 950 be64toh(vnd->sc_comp_offsets[i]); 951 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 952 - vnd->sc_comp_offsets[i]; 953 if (comp_size > comp_maxsize) 954 comp_maxsize = comp_size; 955 } 956 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 957 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 958 959 /* create compressed data buffer */ 960 vnd->sc_comp_buff = malloc(comp_maxsize, 961 M_DEVBUF, M_WAITOK); 962 963 /* create decompressed buffer */ 964 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 965 M_DEVBUF, M_WAITOK); 966 vnd->sc_comp_buffblk = -1; 967 968 /* Initialize decompress stream */ 969 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 970 vnd->sc_comp_stream.zalloc = vnd_alloc; 971 vnd->sc_comp_stream.zfree = vnd_free; 972 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 973 if(error) { 974 if(vnd->sc_comp_stream.msg) 975 printf("vnd%d: compressed file, %s\n", 976 unit, vnd->sc_comp_stream.msg); 977 VOP_UNLOCK(nd.ni_vp, 0); 978 error = EINVAL; 979 goto close_and_exit; 980 } 981 982 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 983 #else /* !VND_COMPRESSION */ 984 VOP_UNLOCK(nd.ni_vp, 0); 985 error = EOPNOTSUPP; 986 goto close_and_exit; 987 #endif /* VND_COMPRESSION */ 988 } 989 990 VOP_UNLOCK(nd.ni_vp, 0); 991 vnd->sc_vp = nd.ni_vp; 992 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 993 994 /* 995 * Use pseudo-geometry specified. If none was provided, 996 * use "standard" Adaptec fictitious geometry. 997 */ 998 if (vio->vnd_flags & VNDIOF_HASGEOM) { 999 1000 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1001 sizeof(vio->vnd_geom)); 1002 1003 /* 1004 * Sanity-check the sector size. 1005 * XXX Don't allow secsize < DEV_BSIZE. Should 1006 * XXX we? 1007 */ 1008 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 1009 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 1010 vnd->sc_geom.vng_ncylinders == 0 || 1011 (vnd->sc_geom.vng_ntracks * 1012 vnd->sc_geom.vng_nsectors) == 0) { 1013 error = EINVAL; 1014 goto close_and_exit; 1015 } 1016 1017 /* 1018 * Compute the size (in DEV_BSIZE blocks) specified 1019 * by the geometry. 1020 */ 1021 geomsize = (vnd->sc_geom.vng_nsectors * 1022 vnd->sc_geom.vng_ntracks * 1023 vnd->sc_geom.vng_ncylinders) * 1024 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1025 1026 /* 1027 * Sanity-check the size against the specified 1028 * geometry. 1029 */ 1030 if (vnd->sc_size < geomsize) { 1031 error = EINVAL; 1032 goto close_and_exit; 1033 } 1034 } else if (vnd->sc_size >= (32 * 64)) { 1035 /* 1036 * Size must be at least 2048 DEV_BSIZE blocks 1037 * (1M) in order to use this geometry. 1038 */ 1039 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1040 vnd->sc_geom.vng_nsectors = 32; 1041 vnd->sc_geom.vng_ntracks = 64; 1042 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1043 } else { 1044 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1045 vnd->sc_geom.vng_nsectors = 1; 1046 vnd->sc_geom.vng_ntracks = 1; 1047 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1048 } 1049 1050 if (vio->vnd_flags & VNDIOF_READONLY) { 1051 vnd->sc_flags |= VNF_READONLY; 1052 } 1053 1054 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1055 goto close_and_exit; 1056 1057 vndthrottle(vnd, vnd->sc_vp); 1058 vio->vnd_size = dbtob(vnd->sc_size); 1059 vnd->sc_flags |= VNF_INITED; 1060 1061 /* create the kernel thread, wait for it to be up */ 1062 error = kthread_create1(vndthread, vnd, &vnd->sc_kthread, 1063 vnd->sc_dev.dv_xname); 1064 if (error) 1065 goto close_and_exit; 1066 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1067 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1068 } 1069 #ifdef DEBUG 1070 if (vnddebug & VDB_INIT) 1071 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1072 vnd->sc_vp, (unsigned long) vnd->sc_size, 1073 vnd->sc_geom.vng_secsize, 1074 vnd->sc_geom.vng_nsectors, 1075 vnd->sc_geom.vng_ntracks, 1076 vnd->sc_geom.vng_ncylinders); 1077 #endif 1078 1079 /* Attach the disk. */ 1080 vnd->sc_dkdev.dk_name = vnd->sc_dev.dv_xname; 1081 pseudo_disk_attach(&vnd->sc_dkdev); 1082 1083 /* Initialize the xfer and buffer pools. */ 1084 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1085 0, 0, "vndxpl", NULL); 1086 1087 /* Try and read the disklabel. */ 1088 vndgetdisklabel(dev, vnd); 1089 1090 vndunlock(vnd); 1091 1092 break; 1093 1094 close_and_exit: 1095 (void) vn_close(nd.ni_vp, fflags, l->l_cred, l); 1096 unlock_and_exit: 1097 #ifdef VND_COMPRESSION 1098 /* free any allocated memory (for compressed file) */ 1099 if(vnd->sc_comp_offsets) { 1100 free(vnd->sc_comp_offsets, M_DEVBUF); 1101 vnd->sc_comp_offsets = NULL; 1102 } 1103 if(vnd->sc_comp_buff) { 1104 free(vnd->sc_comp_buff, M_DEVBUF); 1105 vnd->sc_comp_buff = NULL; 1106 } 1107 if(vnd->sc_comp_decombuf) { 1108 free(vnd->sc_comp_decombuf, M_DEVBUF); 1109 vnd->sc_comp_decombuf = NULL; 1110 } 1111 #endif /* VND_COMPRESSION */ 1112 vndunlock(vnd); 1113 return (error); 1114 1115 case VNDIOCCLR: 1116 if ((error = vndlock(vnd)) != 0) 1117 return (error); 1118 1119 /* 1120 * Don't unconfigure if any other partitions are open 1121 * or if both the character and block flavors of this 1122 * partition are open. 1123 */ 1124 part = DISKPART(dev); 1125 pmask = (1 << part); 1126 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1127 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1128 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1129 !(vio->vnd_flags & VNDIOF_FORCE)) { 1130 vndunlock(vnd); 1131 return (EBUSY); 1132 } 1133 1134 /* 1135 * XXX vndclear() might call vndclose() implicitely; 1136 * release lock to avoid recursion 1137 */ 1138 vndunlock(vnd); 1139 vndclear(vnd, minor(dev)); 1140 #ifdef DEBUG 1141 if (vnddebug & VDB_INIT) 1142 printf("vndioctl: CLRed\n"); 1143 #endif 1144 1145 /* Destroy the xfer and buffer pools. */ 1146 pool_destroy(&vnd->sc_vxpool); 1147 1148 /* Detatch the disk. */ 1149 pseudo_disk_detach(&vnd->sc_dkdev); 1150 break; 1151 1152 #ifdef COMPAT_30 1153 case VNDIOOCGET: { 1154 struct vnd_ouser *vnu; 1155 struct vattr va; 1156 vnu = (struct vnd_ouser *)data; 1157 KASSERT(l); 1158 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1159 case 0: 1160 vnu->vnu_dev = va.va_fsid; 1161 vnu->vnu_ino = va.va_fileid; 1162 break; 1163 case -1: 1164 /* unused is not an error */ 1165 vnu->vnu_dev = 0; 1166 vnu->vnu_ino = 0; 1167 break; 1168 default: 1169 return error; 1170 } 1171 break; 1172 } 1173 #endif 1174 case VNDIOCGET: { 1175 struct vnd_user *vnu; 1176 struct vattr va; 1177 vnu = (struct vnd_user *)data; 1178 KASSERT(l); 1179 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1180 case 0: 1181 vnu->vnu_dev = va.va_fsid; 1182 vnu->vnu_ino = va.va_fileid; 1183 break; 1184 case -1: 1185 /* unused is not an error */ 1186 vnu->vnu_dev = 0; 1187 vnu->vnu_ino = 0; 1188 break; 1189 default: 1190 return error; 1191 } 1192 break; 1193 } 1194 1195 case DIOCGDINFO: 1196 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1197 break; 1198 1199 #ifdef __HAVE_OLD_DISKLABEL 1200 case ODIOCGDINFO: 1201 newlabel = *(vnd->sc_dkdev.dk_label); 1202 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1203 return ENOTTY; 1204 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1205 break; 1206 #endif 1207 1208 case DIOCGPART: 1209 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1210 ((struct partinfo *)data)->part = 1211 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1212 break; 1213 1214 case DIOCWDINFO: 1215 case DIOCSDINFO: 1216 #ifdef __HAVE_OLD_DISKLABEL 1217 case ODIOCWDINFO: 1218 case ODIOCSDINFO: 1219 #endif 1220 { 1221 struct disklabel *lp; 1222 1223 if ((error = vndlock(vnd)) != 0) 1224 return (error); 1225 1226 vnd->sc_flags |= VNF_LABELLING; 1227 1228 #ifdef __HAVE_OLD_DISKLABEL 1229 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1230 memset(&newlabel, 0, sizeof newlabel); 1231 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1232 lp = &newlabel; 1233 } else 1234 #endif 1235 lp = (struct disklabel *)data; 1236 1237 error = setdisklabel(vnd->sc_dkdev.dk_label, 1238 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1239 if (error == 0) { 1240 if (cmd == DIOCWDINFO 1241 #ifdef __HAVE_OLD_DISKLABEL 1242 || cmd == ODIOCWDINFO 1243 #endif 1244 ) 1245 error = writedisklabel(VNDLABELDEV(dev), 1246 vndstrategy, vnd->sc_dkdev.dk_label, 1247 vnd->sc_dkdev.dk_cpulabel); 1248 } 1249 1250 vnd->sc_flags &= ~VNF_LABELLING; 1251 1252 vndunlock(vnd); 1253 1254 if (error) 1255 return (error); 1256 break; 1257 } 1258 1259 case DIOCKLABEL: 1260 if (*(int *)data != 0) 1261 vnd->sc_flags |= VNF_KLABEL; 1262 else 1263 vnd->sc_flags &= ~VNF_KLABEL; 1264 break; 1265 1266 case DIOCWLABEL: 1267 if (*(int *)data != 0) 1268 vnd->sc_flags |= VNF_WLABEL; 1269 else 1270 vnd->sc_flags &= ~VNF_WLABEL; 1271 break; 1272 1273 case DIOCGDEFLABEL: 1274 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1275 break; 1276 1277 #ifdef __HAVE_OLD_DISKLABEL 1278 case ODIOCGDEFLABEL: 1279 vndgetdefaultlabel(vnd, &newlabel); 1280 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1281 return ENOTTY; 1282 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1283 break; 1284 #endif 1285 1286 default: 1287 return (ENOTTY); 1288 } 1289 1290 return (0); 1291 } 1292 1293 /* 1294 * Duplicate the current processes' credentials. Since we are called only 1295 * as the result of a SET ioctl and only root can do that, any future access 1296 * to this "disk" is essentially as root. Note that credentials may change 1297 * if some other uid can write directly to the mapped file (NFS). 1298 */ 1299 static int 1300 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1301 { 1302 struct uio auio; 1303 struct iovec aiov; 1304 char *tmpbuf; 1305 int error; 1306 1307 vnd->sc_cred = kauth_cred_dup(cred); 1308 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1309 1310 /* XXX: Horrible kludge to establish credentials for NFS */ 1311 aiov.iov_base = tmpbuf; 1312 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1313 auio.uio_iov = &aiov; 1314 auio.uio_iovcnt = 1; 1315 auio.uio_offset = 0; 1316 auio.uio_rw = UIO_READ; 1317 auio.uio_resid = aiov.iov_len; 1318 UIO_SETUP_SYSSPACE(&auio); 1319 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1320 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1321 if (error == 0) { 1322 /* 1323 * Because vnd does all IO directly through the vnode 1324 * we need to flush (at least) the buffer from the above 1325 * VOP_READ from the buffer cache to prevent cache 1326 * incoherencies. Also, be careful to write dirty 1327 * buffers back to stable storage. 1328 */ 1329 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1330 curlwp, 0, 0); 1331 } 1332 VOP_UNLOCK(vnd->sc_vp, 0); 1333 1334 free(tmpbuf, M_TEMP); 1335 return (error); 1336 } 1337 1338 /* 1339 * Set maxactive based on FS type 1340 */ 1341 static void 1342 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1343 { 1344 #ifdef NFS 1345 extern int (**nfsv2_vnodeop_p)(void *); 1346 1347 if (vp->v_op == nfsv2_vnodeop_p) 1348 vnd->sc_maxactive = 2; 1349 else 1350 #else 1351 do { if (&vp) {} } while (/* CONSTCOND */ 0); 1352 #endif 1353 vnd->sc_maxactive = 8; 1354 1355 if (vnd->sc_maxactive < 1) 1356 vnd->sc_maxactive = 1; 1357 } 1358 1359 #if 0 1360 static void 1361 vndshutdown(void) 1362 { 1363 struct vnd_softc *vnd; 1364 1365 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1366 if (vnd->sc_flags & VNF_INITED) 1367 vndclear(vnd); 1368 } 1369 #endif 1370 1371 static void 1372 vndclear(struct vnd_softc *vnd, int myminor) 1373 { 1374 struct vnode *vp = vnd->sc_vp; 1375 struct lwp *l = curlwp; 1376 int fflags = FREAD; 1377 int bmaj, cmaj, i, mn; 1378 int s; 1379 1380 #ifdef DEBUG 1381 if (vnddebug & VDB_FOLLOW) 1382 printf("vndclear(%p): vp %p\n", vnd, vp); 1383 #endif 1384 /* locate the major number */ 1385 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1386 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1387 1388 /* Nuke the vnodes for any open instances */ 1389 for (i = 0; i < MAXPARTITIONS; i++) { 1390 mn = DISKMINOR(device_unit(&vnd->sc_dev), i); 1391 vdevgone(bmaj, mn, mn, VBLK); 1392 if (mn != myminor) /* XXX avoid to kill own vnode */ 1393 vdevgone(cmaj, mn, mn, VCHR); 1394 } 1395 1396 if ((vnd->sc_flags & VNF_READONLY) == 0) 1397 fflags |= FWRITE; 1398 1399 s = splbio(); 1400 bufq_drain(vnd->sc_tab); 1401 splx(s); 1402 1403 vnd->sc_flags |= VNF_VUNCONF; 1404 wakeup(&vnd->sc_tab); 1405 while (vnd->sc_flags & VNF_KTHREAD) 1406 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1407 1408 #ifdef VND_COMPRESSION 1409 /* free the compressed file buffers */ 1410 if(vnd->sc_flags & VNF_COMP) { 1411 if(vnd->sc_comp_offsets) { 1412 free(vnd->sc_comp_offsets, M_DEVBUF); 1413 vnd->sc_comp_offsets = NULL; 1414 } 1415 if(vnd->sc_comp_buff) { 1416 free(vnd->sc_comp_buff, M_DEVBUF); 1417 vnd->sc_comp_buff = NULL; 1418 } 1419 if(vnd->sc_comp_decombuf) { 1420 free(vnd->sc_comp_decombuf, M_DEVBUF); 1421 vnd->sc_comp_decombuf = NULL; 1422 } 1423 } 1424 #endif /* VND_COMPRESSION */ 1425 vnd->sc_flags &= 1426 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1427 | VNF_VUNCONF | VNF_COMP); 1428 if (vp == (struct vnode *)0) 1429 panic("vndclear: null vp"); 1430 (void) vn_close(vp, fflags, vnd->sc_cred, l); 1431 kauth_cred_free(vnd->sc_cred); 1432 vnd->sc_vp = (struct vnode *)0; 1433 vnd->sc_cred = (kauth_cred_t)0; 1434 vnd->sc_size = 0; 1435 } 1436 1437 static int 1438 vndsize(dev_t dev) 1439 { 1440 struct vnd_softc *sc; 1441 struct disklabel *lp; 1442 int part, unit, omask; 1443 int size; 1444 1445 unit = vndunit(dev); 1446 sc = (struct vnd_softc *)device_lookup(&vnd_cd, unit); 1447 if (sc == NULL) 1448 return -1; 1449 1450 if ((sc->sc_flags & VNF_INITED) == 0) 1451 return (-1); 1452 1453 part = DISKPART(dev); 1454 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1455 lp = sc->sc_dkdev.dk_label; 1456 1457 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1458 return (-1); 1459 1460 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1461 size = -1; 1462 else 1463 size = lp->d_partitions[part].p_size * 1464 (lp->d_secsize / DEV_BSIZE); 1465 1466 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1467 return (-1); 1468 1469 return (size); 1470 } 1471 1472 static int 1473 vnddump(dev_t dev __unused, daddr_t blkno __unused, caddr_t va __unused, 1474 size_t size __unused) 1475 { 1476 1477 /* Not implemented. */ 1478 return ENXIO; 1479 } 1480 1481 static void 1482 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1483 { 1484 struct vndgeom *vng = &sc->sc_geom; 1485 struct partition *pp; 1486 1487 memset(lp, 0, sizeof(*lp)); 1488 1489 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1490 lp->d_secsize = vng->vng_secsize; 1491 lp->d_nsectors = vng->vng_nsectors; 1492 lp->d_ntracks = vng->vng_ntracks; 1493 lp->d_ncylinders = vng->vng_ncylinders; 1494 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1495 1496 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1497 lp->d_type = DTYPE_VND; 1498 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1499 lp->d_rpm = 3600; 1500 lp->d_interleave = 1; 1501 lp->d_flags = 0; 1502 1503 pp = &lp->d_partitions[RAW_PART]; 1504 pp->p_offset = 0; 1505 pp->p_size = lp->d_secperunit; 1506 pp->p_fstype = FS_UNUSED; 1507 lp->d_npartitions = RAW_PART + 1; 1508 1509 lp->d_magic = DISKMAGIC; 1510 lp->d_magic2 = DISKMAGIC; 1511 lp->d_checksum = dkcksum(lp); 1512 } 1513 1514 /* 1515 * Read the disklabel from a vnd. If one is not present, create a fake one. 1516 */ 1517 static void 1518 vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1519 { 1520 const char *errstring; 1521 struct disklabel *lp = sc->sc_dkdev.dk_label; 1522 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1523 int i; 1524 1525 memset(clp, 0, sizeof(*clp)); 1526 1527 vndgetdefaultlabel(sc, lp); 1528 1529 /* 1530 * Call the generic disklabel extraction routine. 1531 */ 1532 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1533 if (errstring) { 1534 /* 1535 * Lack of disklabel is common, but we print the warning 1536 * anyway, since it might contain other useful information. 1537 */ 1538 printf("%s: %s\n", sc->sc_dev.dv_xname, errstring); 1539 1540 /* 1541 * For historical reasons, if there's no disklabel 1542 * present, all partitions must be FS_BSDFFS and 1543 * occupy the entire disk. 1544 */ 1545 for (i = 0; i < MAXPARTITIONS; i++) { 1546 /* 1547 * Don't wipe out port specific hack (such as 1548 * dos partition hack of i386 port). 1549 */ 1550 if (lp->d_partitions[i].p_size != 0) 1551 continue; 1552 1553 lp->d_partitions[i].p_size = lp->d_secperunit; 1554 lp->d_partitions[i].p_offset = 0; 1555 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1556 } 1557 1558 strncpy(lp->d_packname, "default label", 1559 sizeof(lp->d_packname)); 1560 1561 lp->d_npartitions = MAXPARTITIONS; 1562 lp->d_checksum = dkcksum(lp); 1563 } 1564 1565 /* In-core label now valid. */ 1566 sc->sc_flags |= VNF_VLABEL; 1567 } 1568 1569 /* 1570 * Wait interruptibly for an exclusive lock. 1571 * 1572 * XXX 1573 * Several drivers do this; it should be abstracted and made MP-safe. 1574 */ 1575 static int 1576 vndlock(struct vnd_softc *sc) 1577 { 1578 int error; 1579 1580 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1581 sc->sc_flags |= VNF_WANTED; 1582 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1583 return (error); 1584 } 1585 sc->sc_flags |= VNF_LOCKED; 1586 return (0); 1587 } 1588 1589 /* 1590 * Unlock and wake up any waiters. 1591 */ 1592 static void 1593 vndunlock(struct vnd_softc *sc) 1594 { 1595 1596 sc->sc_flags &= ~VNF_LOCKED; 1597 if ((sc->sc_flags & VNF_WANTED) != 0) { 1598 sc->sc_flags &= ~VNF_WANTED; 1599 wakeup(sc); 1600 } 1601 } 1602 1603 #ifdef VND_COMPRESSION 1604 /* compressed file read */ 1605 static void 1606 compstrategy(struct buf *bp, off_t bn) 1607 { 1608 int error; 1609 int unit = vndunit(bp->b_dev); 1610 struct vnd_softc *vnd = 1611 (struct vnd_softc *)device_lookup(&vnd_cd, unit); 1612 u_int32_t comp_block; 1613 struct uio auio; 1614 caddr_t addr; 1615 int s; 1616 1617 /* set up constants for data move */ 1618 auio.uio_rw = UIO_READ; 1619 UIO_SETUP_SYSSPACE(&auio); 1620 1621 /* read, and transfer the data */ 1622 addr = bp->b_data; 1623 s = splbio(); 1624 while (bp->b_resid > 0) { 1625 unsigned length; 1626 size_t length_in_buffer; 1627 u_int32_t offset_in_buffer; 1628 struct iovec aiov; 1629 1630 /* calculate the compressed block number */ 1631 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1632 1633 /* check for good block number */ 1634 if (comp_block >= vnd->sc_comp_numoffs) { 1635 bp->b_error = EINVAL; 1636 bp->b_flags |= B_ERROR; 1637 splx(s); 1638 return; 1639 } 1640 1641 /* read in the compressed block, if not in buffer */ 1642 if (comp_block != vnd->sc_comp_buffblk) { 1643 length = vnd->sc_comp_offsets[comp_block + 1] - 1644 vnd->sc_comp_offsets[comp_block]; 1645 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1646 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1647 length, vnd->sc_comp_offsets[comp_block], 1648 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL); 1649 if (error) { 1650 bp->b_error = error; 1651 bp->b_flags |= B_ERROR; 1652 VOP_UNLOCK(vnd->sc_vp, 0); 1653 splx(s); 1654 return; 1655 } 1656 /* uncompress the buffer */ 1657 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1658 vnd->sc_comp_stream.avail_in = length; 1659 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1660 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1661 inflateReset(&vnd->sc_comp_stream); 1662 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1663 if (error != Z_STREAM_END) { 1664 if (vnd->sc_comp_stream.msg) 1665 printf("%s: compressed file, %s\n", 1666 vnd->sc_dev.dv_xname, 1667 vnd->sc_comp_stream.msg); 1668 bp->b_error = EBADMSG; 1669 bp->b_flags |= B_ERROR; 1670 VOP_UNLOCK(vnd->sc_vp, 0); 1671 splx(s); 1672 return; 1673 } 1674 vnd->sc_comp_buffblk = comp_block; 1675 VOP_UNLOCK(vnd->sc_vp, 0); 1676 } 1677 1678 /* transfer the usable uncompressed data */ 1679 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1680 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1681 if (length_in_buffer > bp->b_resid) 1682 length_in_buffer = bp->b_resid; 1683 auio.uio_iov = &aiov; 1684 auio.uio_iovcnt = 1; 1685 aiov.iov_base = addr; 1686 aiov.iov_len = length_in_buffer; 1687 auio.uio_resid = aiov.iov_len; 1688 auio.uio_offset = 0; 1689 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1690 length_in_buffer, &auio); 1691 if (error) { 1692 bp->b_error = error; 1693 bp->b_flags |= B_ERROR; 1694 splx(s); 1695 return; 1696 } 1697 1698 bn += length_in_buffer; 1699 addr += length_in_buffer; 1700 bp->b_resid -= length_in_buffer; 1701 } 1702 splx(s); 1703 } 1704 1705 /* compression memory allocation routines */ 1706 static void * 1707 vnd_alloc(void *aux __unused, u_int items, u_int siz) 1708 { 1709 return malloc(items * siz, M_TEMP, M_NOWAIT); 1710 } 1711 1712 static void 1713 vnd_free(void *aux __unused, void *ptr) 1714 { 1715 free(ptr, M_TEMP); 1716 } 1717 #endif /* VND_COMPRESSION */ 1718