1 /* $NetBSD: vnd.c,v 1.147 2006/05/14 21:42:26 elad Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 72 * 73 * @(#)vn.c 8.9 (Berkeley) 5/14/95 74 */ 75 76 /* 77 * Copyright (c) 1988 University of Utah. 78 * 79 * This code is derived from software contributed to Berkeley by 80 * the Systems Programming Group of the University of Utah Computer 81 * Science Department. 82 * 83 * Redistribution and use in source and binary forms, with or without 84 * modification, are permitted provided that the following conditions 85 * are met: 86 * 1. Redistributions of source code must retain the above copyright 87 * notice, this list of conditions and the following disclaimer. 88 * 2. Redistributions in binary form must reproduce the above copyright 89 * notice, this list of conditions and the following disclaimer in the 90 * documentation and/or other materials provided with the distribution. 91 * 3. All advertising materials mentioning features or use of this software 92 * must display the following acknowledgement: 93 * This product includes software developed by the University of 94 * California, Berkeley and its contributors. 95 * 4. Neither the name of the University nor the names of its contributors 96 * may be used to endorse or promote products derived from this software 97 * without specific prior written permission. 98 * 99 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 101 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 102 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 103 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 104 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 105 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 106 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 107 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 108 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 109 * SUCH DAMAGE. 110 * 111 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 112 * 113 * @(#)vn.c 8.9 (Berkeley) 5/14/95 114 */ 115 116 /* 117 * Vnode disk driver. 118 * 119 * Block/character interface to a vnode. Allows one to treat a file 120 * as a disk (e.g. build a filesystem in it, mount it, etc.). 121 * 122 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 123 * instead of a simple VOP_RDWR. We do this to avoid distorting the 124 * local buffer cache. 125 * 126 * NOTE 2: There is a security issue involved with this driver. 127 * Once mounted all access to the contents of the "mapped" file via 128 * the special file is controlled by the permissions on the special 129 * file, the protection of the mapped file is ignored (effectively, 130 * by using root credentials in all transactions). 131 * 132 * NOTE 3: Doesn't interact with leases, should it? 133 */ 134 135 #include <sys/cdefs.h> 136 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.147 2006/05/14 21:42:26 elad Exp $"); 137 138 #if defined(_KERNEL_OPT) 139 #include "fs_nfs.h" 140 #include "opt_vnd.h" 141 #endif 142 143 #include <sys/param.h> 144 #include <sys/systm.h> 145 #include <sys/namei.h> 146 #include <sys/proc.h> 147 #include <sys/kthread.h> 148 #include <sys/errno.h> 149 #include <sys/buf.h> 150 #include <sys/bufq.h> 151 #include <sys/malloc.h> 152 #include <sys/ioctl.h> 153 #include <sys/disklabel.h> 154 #include <sys/device.h> 155 #include <sys/disk.h> 156 #include <sys/stat.h> 157 #include <sys/mount.h> 158 #include <sys/vnode.h> 159 #include <sys/file.h> 160 #include <sys/uio.h> 161 #include <sys/conf.h> 162 #include <sys/kauth.h> 163 164 #include <net/zlib.h> 165 166 #include <miscfs/specfs/specdev.h> 167 168 #include <dev/vndvar.h> 169 170 #if defined(VNDDEBUG) && !defined(DEBUG) 171 #define DEBUG 172 #endif 173 174 #ifdef DEBUG 175 int dovndcluster = 1; 176 #define VDB_FOLLOW 0x01 177 #define VDB_INIT 0x02 178 #define VDB_IO 0x04 179 #define VDB_LABEL 0x08 180 int vnddebug = 0x00; 181 #endif 182 183 #define vndunit(x) DISKUNIT(x) 184 185 struct vndxfer { 186 struct buf vx_buf; 187 struct vnd_softc *vx_vnd; 188 }; 189 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 190 191 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 192 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 193 194 #define VNDLABELDEV(dev) \ 195 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 196 197 /* called by main() at boot time (XXX: and the LKM driver) */ 198 void vndattach(int); 199 200 static void vndclear(struct vnd_softc *, int); 201 static int vndsetcred(struct vnd_softc *, kauth_cred_t); 202 static void vndthrottle(struct vnd_softc *, struct vnode *); 203 static void vndiodone(struct buf *); 204 #if 0 205 static void vndshutdown(void); 206 #endif 207 208 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 209 static void vndgetdisklabel(dev_t, struct vnd_softc *); 210 211 static int vndlock(struct vnd_softc *); 212 static void vndunlock(struct vnd_softc *); 213 #ifdef VND_COMPRESSION 214 static void compstrategy(struct buf *, off_t); 215 static void *vnd_alloc(void *, u_int, u_int); 216 static void vnd_free(void *, void *); 217 #endif /* VND_COMPRESSION */ 218 219 static void vndthread(void *); 220 221 static dev_type_open(vndopen); 222 static dev_type_close(vndclose); 223 static dev_type_read(vndread); 224 static dev_type_write(vndwrite); 225 static dev_type_ioctl(vndioctl); 226 static dev_type_strategy(vndstrategy); 227 static dev_type_dump(vnddump); 228 static dev_type_size(vndsize); 229 230 const struct bdevsw vnd_bdevsw = { 231 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 232 }; 233 234 const struct cdevsw vnd_cdevsw = { 235 vndopen, vndclose, vndread, vndwrite, vndioctl, 236 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 237 }; 238 239 static int vnd_match(struct device *, struct cfdata *, void *); 240 static void vnd_attach(struct device *, struct device *, void *); 241 static int vnd_detach(struct device *, int); 242 243 CFATTACH_DECL(vnd, sizeof(struct vnd_softc), 244 vnd_match, vnd_attach, vnd_detach, NULL); 245 extern struct cfdriver vnd_cd; 246 247 static struct vnd_softc *vnd_spawn(int); 248 int vnd_destroy(struct device *); 249 250 void 251 vndattach(int num) 252 { 253 int error; 254 255 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 256 if (error) 257 aprint_error("%s: unable to register cfattach\n", 258 vnd_cd.cd_name); 259 } 260 261 static int 262 vnd_match(struct device *self, struct cfdata *cfdata, void *aux) 263 { 264 return 1; 265 } 266 267 static void 268 vnd_attach(struct device *parent, struct device *self, void *aux) 269 { 270 struct vnd_softc *sc = (struct vnd_softc *)self; 271 272 sc->sc_comp_offsets = NULL; 273 sc->sc_comp_buff = NULL; 274 sc->sc_comp_decombuf = NULL; 275 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 276 pseudo_disk_init(&sc->sc_dkdev); 277 } 278 279 static int 280 vnd_detach(struct device *self, int flags) 281 { 282 struct vnd_softc *sc = (struct vnd_softc *)self; 283 if (sc->sc_flags & VNF_INITED) 284 return EBUSY; 285 286 bufq_free(sc->sc_tab); 287 288 return 0; 289 } 290 291 static struct vnd_softc * 292 vnd_spawn(int unit) 293 { 294 struct cfdata *cf; 295 296 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 297 cf->cf_name = vnd_cd.cd_name; 298 cf->cf_atname = vnd_cd.cd_name; 299 cf->cf_unit = unit; 300 cf->cf_fstate = FSTATE_STAR; 301 302 return (struct vnd_softc *)config_attach_pseudo(cf); 303 } 304 305 int 306 vnd_destroy(struct device *dev) 307 { 308 int error; 309 struct cfdata *cf; 310 311 cf = device_cfdata(dev); 312 error = config_detach(dev, DETACH_QUIET); 313 if (error) 314 return error; 315 free(cf, M_DEVBUF); 316 return 0; 317 } 318 319 static int 320 vndopen(dev_t dev, int flags, int mode, struct lwp *l) 321 { 322 int unit = vndunit(dev); 323 struct vnd_softc *sc; 324 int error = 0, part, pmask; 325 struct disklabel *lp; 326 327 #ifdef DEBUG 328 if (vnddebug & VDB_FOLLOW) 329 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 330 #endif 331 sc = device_lookup(&vnd_cd, unit); 332 if (sc == NULL) { 333 sc = vnd_spawn(unit); 334 if (sc == NULL) 335 return ENOMEM; 336 } 337 338 if ((error = vndlock(sc)) != 0) 339 return (error); 340 341 lp = sc->sc_dkdev.dk_label; 342 343 part = DISKPART(dev); 344 pmask = (1 << part); 345 346 /* 347 * If we're initialized, check to see if there are any other 348 * open partitions. If not, then it's safe to update the 349 * in-core disklabel. Only read the disklabel if it is 350 * not already valid. 351 */ 352 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 353 sc->sc_dkdev.dk_openmask == 0) 354 vndgetdisklabel(dev, sc); 355 356 /* Check that the partitions exists. */ 357 if (part != RAW_PART) { 358 if (((sc->sc_flags & VNF_INITED) == 0) || 359 ((part >= lp->d_npartitions) || 360 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 361 error = ENXIO; 362 goto done; 363 } 364 } 365 366 /* Prevent our unit from being unconfigured while open. */ 367 switch (mode) { 368 case S_IFCHR: 369 sc->sc_dkdev.dk_copenmask |= pmask; 370 break; 371 372 case S_IFBLK: 373 sc->sc_dkdev.dk_bopenmask |= pmask; 374 break; 375 } 376 sc->sc_dkdev.dk_openmask = 377 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 378 379 done: 380 vndunlock(sc); 381 return (error); 382 } 383 384 static int 385 vndclose(dev_t dev, int flags, int mode, struct lwp *l) 386 { 387 int unit = vndunit(dev); 388 struct vnd_softc *sc; 389 int error = 0, part; 390 391 #ifdef DEBUG 392 if (vnddebug & VDB_FOLLOW) 393 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 394 #endif 395 sc = device_lookup(&vnd_cd, unit); 396 if (sc == NULL) 397 return ENXIO; 398 399 if ((error = vndlock(sc)) != 0) 400 return (error); 401 402 part = DISKPART(dev); 403 404 /* ...that much closer to allowing unconfiguration... */ 405 switch (mode) { 406 case S_IFCHR: 407 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 408 break; 409 410 case S_IFBLK: 411 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 412 break; 413 } 414 sc->sc_dkdev.dk_openmask = 415 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 416 417 vndunlock(sc); 418 419 if ((sc->sc_flags & VNF_INITED) == 0) { 420 if ((error = vnd_destroy((struct device *)sc)) != 0) { 421 aprint_error("%s: unable to detach instance\n", 422 sc->sc_dev.dv_xname); 423 return error; 424 } 425 } 426 427 return (0); 428 } 429 430 /* 431 * Queue the request, and wakeup the kernel thread to handle it. 432 */ 433 static void 434 vndstrategy(struct buf *bp) 435 { 436 int unit = vndunit(bp->b_dev); 437 struct vnd_softc *vnd = 438 (struct vnd_softc *)device_lookup(&vnd_cd, unit); 439 struct disklabel *lp = vnd->sc_dkdev.dk_label; 440 daddr_t blkno; 441 int s = splbio(); 442 443 bp->b_resid = bp->b_bcount; 444 445 if ((vnd->sc_flags & VNF_INITED) == 0) { 446 bp->b_error = ENXIO; 447 bp->b_flags |= B_ERROR; 448 goto done; 449 } 450 451 /* 452 * The transfer must be a whole number of blocks. 453 */ 454 if ((bp->b_bcount % lp->d_secsize) != 0) { 455 bp->b_error = EINVAL; 456 bp->b_flags |= B_ERROR; 457 goto done; 458 } 459 460 /* 461 * check if we're read-only. 462 */ 463 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 464 bp->b_error = EACCES; 465 bp->b_flags |= B_ERROR; 466 goto done; 467 } 468 469 /* 470 * Do bounds checking and adjust transfer. If there's an error, 471 * the bounds check will flag that for us. 472 */ 473 if (DISKPART(bp->b_dev) == RAW_PART) { 474 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 475 vnd->sc_size) <= 0) 476 goto done; 477 } else { 478 if (bounds_check_with_label(&vnd->sc_dkdev, 479 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 480 goto done; 481 } 482 483 /* If it's a nil transfer, wake up the top half now. */ 484 if (bp->b_bcount == 0) 485 goto done; 486 487 /* 488 * Put the block number in terms of the logical blocksize 489 * of the "device". 490 */ 491 492 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 493 494 /* 495 * Translate the partition-relative block number to an absolute. 496 */ 497 if (DISKPART(bp->b_dev) != RAW_PART) { 498 struct partition *pp; 499 500 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 501 DISKPART(bp->b_dev)]; 502 blkno += pp->p_offset; 503 } 504 bp->b_rawblkno = blkno; 505 506 #ifdef DEBUG 507 if (vnddebug & VDB_FOLLOW) 508 printf("vndstrategy(%p): unit %d\n", bp, unit); 509 #endif 510 BUFQ_PUT(vnd->sc_tab, bp); 511 wakeup(&vnd->sc_tab); 512 splx(s); 513 return; 514 done: 515 biodone(bp); 516 splx(s); 517 } 518 519 static void 520 vndthread(void *arg) 521 { 522 struct vnd_softc *vnd = arg; 523 struct mount *mp; 524 int s, bsize; 525 int sz, error; 526 struct disklabel *lp; 527 528 s = splbio(); 529 vnd->sc_flags |= VNF_KTHREAD; 530 wakeup(&vnd->sc_kthread); 531 532 /* 533 * Dequeue requests, break them into bsize pieces and submit using 534 * VOP_BMAP/VOP_STRATEGY. 535 */ 536 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 537 struct vndxfer *vnx; 538 off_t offset; 539 int resid; 540 int skipped = 0; 541 off_t bn; 542 int flags; 543 struct buf *obp; 544 struct buf *bp; 545 546 obp = BUFQ_GET(vnd->sc_tab); 547 if (obp == NULL) { 548 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 549 continue; 550 }; 551 splx(s); 552 flags = obp->b_flags; 553 #ifdef DEBUG 554 if (vnddebug & VDB_FOLLOW) 555 printf("vndthread(%p\n", obp); 556 #endif 557 lp = vnd->sc_dkdev.dk_label; 558 559 /* convert to a byte offset within the file. */ 560 bn = obp->b_rawblkno * lp->d_secsize; 561 562 if (vnd->sc_vp->v_mount == NULL) { 563 obp->b_error = ENXIO; 564 obp->b_flags |= B_ERROR; 565 goto done; 566 } 567 #ifdef VND_COMPRESSION 568 /* handle a compressed read */ 569 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 570 compstrategy(obp, bn); 571 goto done; 572 } 573 #endif /* VND_COMPRESSION */ 574 575 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 576 577 /* 578 * Allocate a header for this transfer and link it to the 579 * buffer 580 */ 581 s = splbio(); 582 vnx = VND_GETXFER(vnd); 583 splx(s); 584 vnx->vx_vnd = vnd; 585 586 bp = &vnx->vx_buf; 587 BUF_INIT(bp); 588 bp->b_flags = (obp->b_flags & B_READ) | B_CALL; 589 bp->b_iodone = vndiodone; 590 bp->b_private = obp; 591 bp->b_vp = vnd->sc_vp; 592 bp->b_data = obp->b_data; 593 bp->b_bcount = bp->b_resid = obp->b_bcount; 594 BIO_COPYPRIO(bp, obp); 595 596 s = splbio(); 597 while (vnd->sc_active >= vnd->sc_maxactive) { 598 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 599 } 600 vnd->sc_active++; 601 splx(s); 602 603 if ((flags & B_READ) == 0) { 604 s = splbio(); 605 V_INCR_NUMOUTPUT(bp->b_vp); 606 splx(s); 607 608 vn_start_write(vnd->sc_vp, &mp, V_WAIT); 609 } 610 611 /* Instrumentation. */ 612 disk_busy(&vnd->sc_dkdev); 613 614 /* 615 * Feed requests sequentially. 616 * We do it this way to keep from flooding NFS servers if we 617 * are connected to an NFS file. This places the burden on 618 * the client rather than the server. 619 */ 620 error = 0; 621 for (offset = 0, resid = bp->b_resid; resid; 622 resid -= sz, offset += sz) { 623 struct buf *nbp; 624 struct vnode *vp; 625 daddr_t nbn; 626 int off, nra; 627 628 nra = 0; 629 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 630 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 631 VOP_UNLOCK(vnd->sc_vp, 0); 632 633 if (error == 0 && (long)nbn == -1) 634 error = EIO; 635 636 /* 637 * If there was an error or a hole in the file...punt. 638 * Note that we may have to wait for any operations 639 * that we have already fired off before releasing 640 * the buffer. 641 * 642 * XXX we could deal with holes here but it would be 643 * a hassle (in the write case). 644 */ 645 if (error) { 646 skipped += resid; 647 break; 648 } 649 650 #ifdef DEBUG 651 if (!dovndcluster) 652 nra = 0; 653 #endif 654 655 off = bn % bsize; 656 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 657 #ifdef DEBUG 658 if (vnddebug & VDB_IO) 659 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 660 " sz 0x%x\n", 661 vnd->sc_vp, vp, (long long)bn, nbn, sz); 662 #endif 663 664 nbp = getiobuf(); 665 nestiobuf_setup(bp, nbp, offset, sz); 666 nbp->b_blkno = nbn + btodb(off); 667 668 #if 0 /* XXX #ifdef DEBUG */ 669 if (vnddebug & VDB_IO) 670 printf("vndstart(%ld): bp %p vp %p blkno " 671 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 672 (long) (vnd-vnd_softc), &nbp->vb_buf, 673 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 674 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 675 nbp->vb_buf.b_bcount); 676 #endif 677 VOP_STRATEGY(vp, nbp); 678 bn += sz; 679 } 680 nestiobuf_done(bp, skipped, error); 681 682 if ((flags & B_READ) == 0) 683 vn_finished_write(mp, 0); 684 685 s = splbio(); 686 continue; 687 done: 688 biodone(obp); 689 s = splbio(); 690 } 691 692 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 693 wakeup(&vnd->sc_kthread); 694 splx(s); 695 kthread_exit(0); 696 } 697 698 static void 699 vndiodone(struct buf *bp) 700 { 701 struct vndxfer *vnx = VND_BUFTOXFER(bp); 702 struct vnd_softc *vnd = vnx->vx_vnd; 703 struct buf *obp = bp->b_private; 704 705 KASSERT(&vnx->vx_buf == bp); 706 KASSERT(vnd->sc_active > 0); 707 #ifdef DEBUG 708 if (vnddebug & VDB_IO) { 709 printf("vndiodone1: bp %p iodone: error %d\n", 710 bp, (bp->b_flags & B_ERROR) != 0 ? bp->b_error : 0); 711 } 712 #endif 713 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 714 (bp->b_flags & B_READ)); 715 vnd->sc_active--; 716 if (vnd->sc_active == 0) { 717 wakeup(&vnd->sc_tab); 718 } 719 obp->b_flags |= bp->b_flags & B_ERROR; 720 obp->b_error = bp->b_error; 721 obp->b_resid = bp->b_resid; 722 VND_PUTXFER(vnd, vnx); 723 biodone(obp); 724 } 725 726 /* ARGSUSED */ 727 static int 728 vndread(dev_t dev, struct uio *uio, int flags) 729 { 730 int unit = vndunit(dev); 731 struct vnd_softc *sc; 732 733 #ifdef DEBUG 734 if (vnddebug & VDB_FOLLOW) 735 printf("vndread(0x%x, %p)\n", dev, uio); 736 #endif 737 738 sc = device_lookup(&vnd_cd, unit); 739 if (sc == NULL) 740 return ENXIO; 741 742 if ((sc->sc_flags & VNF_INITED) == 0) 743 return (ENXIO); 744 745 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 746 } 747 748 /* ARGSUSED */ 749 static int 750 vndwrite(dev_t dev, struct uio *uio, int flags) 751 { 752 int unit = vndunit(dev); 753 struct vnd_softc *sc; 754 755 #ifdef DEBUG 756 if (vnddebug & VDB_FOLLOW) 757 printf("vndwrite(0x%x, %p)\n", dev, uio); 758 #endif 759 760 sc = device_lookup(&vnd_cd, unit); 761 if (sc == NULL) 762 return ENXIO; 763 764 if ((sc->sc_flags & VNF_INITED) == 0) 765 return (ENXIO); 766 767 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 768 } 769 770 static int 771 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 772 { 773 struct vnd_softc *vnd; 774 775 if (*un == -1) 776 *un = unit; 777 if (*un < 0) 778 return EINVAL; 779 780 vnd = device_lookup(&vnd_cd, *un); 781 if (vnd == NULL) 782 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1; 783 784 if ((vnd->sc_flags & VNF_INITED) == 0) 785 return -1; 786 787 return VOP_GETATTR(vnd->sc_vp, va, l->l_proc->p_cred, l); 788 } 789 790 /* ARGSUSED */ 791 static int 792 vndioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l) 793 { 794 int unit = vndunit(dev); 795 struct vnd_softc *vnd; 796 struct vnd_ioctl *vio; 797 struct vattr vattr; 798 struct nameidata nd; 799 int error, part, pmask; 800 size_t geomsize; 801 struct proc *p = (l != NULL) ? l->l_proc : NULL; 802 int fflags; 803 #ifdef __HAVE_OLD_DISKLABEL 804 struct disklabel newlabel; 805 #endif 806 807 #ifdef DEBUG 808 if (vnddebug & VDB_FOLLOW) 809 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 810 dev, cmd, data, flag, p, unit); 811 #endif 812 vnd = device_lookup(&vnd_cd, unit); 813 if (vnd == NULL && 814 #ifdef COMPAT_30 815 cmd != VNDIOOCGET && 816 #endif 817 cmd != VNDIOCGET) 818 return ENXIO; 819 vio = (struct vnd_ioctl *)data; 820 821 /* Must be open for writes for these commands... */ 822 switch (cmd) { 823 case VNDIOCSET: 824 case VNDIOCCLR: 825 case DIOCSDINFO: 826 case DIOCWDINFO: 827 #ifdef __HAVE_OLD_DISKLABEL 828 case ODIOCSDINFO: 829 case ODIOCWDINFO: 830 #endif 831 case DIOCKLABEL: 832 case DIOCWLABEL: 833 if ((flag & FWRITE) == 0) 834 return (EBADF); 835 } 836 837 /* Must be initialized for these... */ 838 switch (cmd) { 839 case VNDIOCCLR: 840 case DIOCGDINFO: 841 case DIOCSDINFO: 842 case DIOCWDINFO: 843 case DIOCGPART: 844 case DIOCKLABEL: 845 case DIOCWLABEL: 846 case DIOCGDEFLABEL: 847 #ifdef __HAVE_OLD_DISKLABEL 848 case ODIOCGDINFO: 849 case ODIOCSDINFO: 850 case ODIOCWDINFO: 851 case ODIOCGDEFLABEL: 852 #endif 853 if ((vnd->sc_flags & VNF_INITED) == 0) 854 return (ENXIO); 855 } 856 857 switch (cmd) { 858 case VNDIOCSET: 859 if (vnd->sc_flags & VNF_INITED) 860 return (EBUSY); 861 862 if ((error = vndlock(vnd)) != 0) 863 return (error); 864 865 fflags = FREAD; 866 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 867 fflags |= FWRITE; 868 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, l); 869 if ((error = vn_open(&nd, fflags, 0)) != 0) 870 goto unlock_and_exit; 871 KASSERT(l); 872 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_proc->p_cred, l); 873 if (!error && nd.ni_vp->v_type != VREG) 874 error = EOPNOTSUPP; 875 if (error) { 876 VOP_UNLOCK(nd.ni_vp, 0); 877 goto close_and_exit; 878 } 879 880 /* If using a compressed file, initialize its info */ 881 /* (or abort with an error if kernel has no compression) */ 882 if (vio->vnd_flags & VNF_COMP) { 883 #ifdef VND_COMPRESSION 884 struct vnd_comp_header *ch; 885 int i; 886 u_int32_t comp_size; 887 u_int32_t comp_maxsize; 888 889 /* allocate space for compresed file header */ 890 ch = malloc(sizeof(struct vnd_comp_header), 891 M_TEMP, M_WAITOK); 892 893 /* read compressed file header */ 894 error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)ch, 895 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 896 IO_UNIT|IO_NODELOCKED, p->p_cred, NULL, NULL); 897 if(error) { 898 free(ch, M_TEMP); 899 VOP_UNLOCK(nd.ni_vp, 0); 900 goto close_and_exit; 901 } 902 903 /* save some header info */ 904 vnd->sc_comp_blksz = ntohl(ch->block_size); 905 /* note last offset is the file byte size */ 906 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 907 free(ch, M_TEMP); 908 if(vnd->sc_comp_blksz % DEV_BSIZE !=0) { 909 VOP_UNLOCK(nd.ni_vp, 0); 910 error = EINVAL; 911 goto close_and_exit; 912 } 913 if(sizeof(struct vnd_comp_header) + 914 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 915 vattr.va_size) { 916 VOP_UNLOCK(nd.ni_vp, 0); 917 error = EINVAL; 918 goto close_and_exit; 919 } 920 921 /* set decompressed file size */ 922 vattr.va_size = 923 (vnd->sc_comp_numoffs - 1) * vnd->sc_comp_blksz; 924 925 /* allocate space for all the compressed offsets */ 926 vnd->sc_comp_offsets = 927 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 928 M_DEVBUF, M_WAITOK); 929 930 /* read in the offsets */ 931 error = vn_rdwr(UIO_READ, nd.ni_vp, 932 (caddr_t)vnd->sc_comp_offsets, 933 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 934 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 935 IO_UNIT|IO_NODELOCKED, p->p_cred, NULL, NULL); 936 if(error) { 937 VOP_UNLOCK(nd.ni_vp, 0); 938 goto close_and_exit; 939 } 940 /* 941 * find largest block size (used for allocation limit). 942 * Also convert offset to native byte order. 943 */ 944 comp_maxsize = 0; 945 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 946 vnd->sc_comp_offsets[i] = 947 be64toh(vnd->sc_comp_offsets[i]); 948 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 949 - vnd->sc_comp_offsets[i]; 950 if (comp_size > comp_maxsize) 951 comp_maxsize = comp_size; 952 } 953 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 954 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 955 956 /* create compressed data buffer */ 957 vnd->sc_comp_buff = malloc(comp_maxsize, 958 M_DEVBUF, M_WAITOK); 959 960 /* create decompressed buffer */ 961 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 962 M_DEVBUF, M_WAITOK); 963 vnd->sc_comp_buffblk = -1; 964 965 /* Initialize decompress stream */ 966 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 967 vnd->sc_comp_stream.zalloc = vnd_alloc; 968 vnd->sc_comp_stream.zfree = vnd_free; 969 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 970 if(error) { 971 if(vnd->sc_comp_stream.msg) 972 printf("vnd%d: compressed file, %s\n", 973 unit, vnd->sc_comp_stream.msg); 974 VOP_UNLOCK(nd.ni_vp, 0); 975 error = EINVAL; 976 goto close_and_exit; 977 } 978 979 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 980 #else /* !VND_COMPRESSION */ 981 VOP_UNLOCK(nd.ni_vp, 0); 982 error = EOPNOTSUPP; 983 goto close_and_exit; 984 #endif /* VND_COMPRESSION */ 985 } 986 987 VOP_UNLOCK(nd.ni_vp, 0); 988 vnd->sc_vp = nd.ni_vp; 989 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 990 991 /* 992 * Use pseudo-geometry specified. If none was provided, 993 * use "standard" Adaptec fictitious geometry. 994 */ 995 if (vio->vnd_flags & VNDIOF_HASGEOM) { 996 997 memcpy(&vnd->sc_geom, &vio->vnd_geom, 998 sizeof(vio->vnd_geom)); 999 1000 /* 1001 * Sanity-check the sector size. 1002 * XXX Don't allow secsize < DEV_BSIZE. Should 1003 * XXX we? 1004 */ 1005 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 1006 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 1007 vnd->sc_geom.vng_ncylinders == 0 || 1008 (vnd->sc_geom.vng_ntracks * 1009 vnd->sc_geom.vng_nsectors) == 0) { 1010 error = EINVAL; 1011 goto close_and_exit; 1012 } 1013 1014 /* 1015 * Compute the size (in DEV_BSIZE blocks) specified 1016 * by the geometry. 1017 */ 1018 geomsize = (vnd->sc_geom.vng_nsectors * 1019 vnd->sc_geom.vng_ntracks * 1020 vnd->sc_geom.vng_ncylinders) * 1021 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1022 1023 /* 1024 * Sanity-check the size against the specified 1025 * geometry. 1026 */ 1027 if (vnd->sc_size < geomsize) { 1028 error = EINVAL; 1029 goto close_and_exit; 1030 } 1031 } else if (vnd->sc_size >= (32 * 64)) { 1032 /* 1033 * Size must be at least 2048 DEV_BSIZE blocks 1034 * (1M) in order to use this geometry. 1035 */ 1036 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1037 vnd->sc_geom.vng_nsectors = 32; 1038 vnd->sc_geom.vng_ntracks = 64; 1039 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1040 } else { 1041 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1042 vnd->sc_geom.vng_nsectors = 1; 1043 vnd->sc_geom.vng_ntracks = 1; 1044 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1045 } 1046 1047 if (vio->vnd_flags & VNDIOF_READONLY) { 1048 vnd->sc_flags |= VNF_READONLY; 1049 } 1050 1051 if ((error = vndsetcred(vnd, p->p_cred)) != 0) 1052 goto close_and_exit; 1053 1054 vndthrottle(vnd, vnd->sc_vp); 1055 vio->vnd_size = dbtob(vnd->sc_size); 1056 vnd->sc_flags |= VNF_INITED; 1057 1058 /* create the kernel thread, wait for it to be up */ 1059 error = kthread_create1(vndthread, vnd, &vnd->sc_kthread, 1060 vnd->sc_dev.dv_xname); 1061 if (error) 1062 goto close_and_exit; 1063 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1064 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1065 } 1066 #ifdef DEBUG 1067 if (vnddebug & VDB_INIT) 1068 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1069 vnd->sc_vp, (unsigned long) vnd->sc_size, 1070 vnd->sc_geom.vng_secsize, 1071 vnd->sc_geom.vng_nsectors, 1072 vnd->sc_geom.vng_ntracks, 1073 vnd->sc_geom.vng_ncylinders); 1074 #endif 1075 1076 /* Attach the disk. */ 1077 vnd->sc_dkdev.dk_name = vnd->sc_dev.dv_xname; 1078 pseudo_disk_attach(&vnd->sc_dkdev); 1079 1080 /* Initialize the xfer and buffer pools. */ 1081 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1082 0, 0, "vndxpl", NULL); 1083 1084 /* Try and read the disklabel. */ 1085 vndgetdisklabel(dev, vnd); 1086 1087 vndunlock(vnd); 1088 1089 break; 1090 1091 close_and_exit: 1092 (void) vn_close(nd.ni_vp, fflags, p->p_cred, l); 1093 unlock_and_exit: 1094 #ifdef VND_COMPRESSION 1095 /* free any allocated memory (for compressed file) */ 1096 if(vnd->sc_comp_offsets) { 1097 free(vnd->sc_comp_offsets, M_DEVBUF); 1098 vnd->sc_comp_offsets = NULL; 1099 } 1100 if(vnd->sc_comp_buff) { 1101 free(vnd->sc_comp_buff, M_DEVBUF); 1102 vnd->sc_comp_buff = NULL; 1103 } 1104 if(vnd->sc_comp_decombuf) { 1105 free(vnd->sc_comp_decombuf, M_DEVBUF); 1106 vnd->sc_comp_decombuf = NULL; 1107 } 1108 #endif /* VND_COMPRESSION */ 1109 vndunlock(vnd); 1110 return (error); 1111 1112 case VNDIOCCLR: 1113 if ((error = vndlock(vnd)) != 0) 1114 return (error); 1115 1116 /* 1117 * Don't unconfigure if any other partitions are open 1118 * or if both the character and block flavors of this 1119 * partition are open. 1120 */ 1121 part = DISKPART(dev); 1122 pmask = (1 << part); 1123 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1124 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1125 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1126 !(vio->vnd_flags & VNDIOF_FORCE)) { 1127 vndunlock(vnd); 1128 return (EBUSY); 1129 } 1130 1131 /* 1132 * XXX vndclear() might call vndclose() implicitely; 1133 * release lock to avoid recursion 1134 */ 1135 vndunlock(vnd); 1136 vndclear(vnd, minor(dev)); 1137 #ifdef DEBUG 1138 if (vnddebug & VDB_INIT) 1139 printf("vndioctl: CLRed\n"); 1140 #endif 1141 1142 /* Destroy the xfer and buffer pools. */ 1143 pool_destroy(&vnd->sc_vxpool); 1144 1145 /* Detatch the disk. */ 1146 pseudo_disk_detach(&vnd->sc_dkdev); 1147 break; 1148 1149 #ifdef COMPAT_30 1150 case VNDIOOCGET: { 1151 struct vnd_ouser *vnu; 1152 struct vattr va; 1153 vnu = (struct vnd_ouser *)data; 1154 KASSERT(l); 1155 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1156 case 0: 1157 vnu->vnu_dev = va.va_fsid; 1158 vnu->vnu_ino = va.va_fileid; 1159 break; 1160 case -1: 1161 /* unused is not an error */ 1162 vnu->vnu_dev = 0; 1163 vnu->vnu_ino = 0; 1164 break; 1165 default: 1166 return error; 1167 } 1168 break; 1169 } 1170 #endif 1171 case VNDIOCGET: { 1172 struct vnd_user *vnu; 1173 struct vattr va; 1174 vnu = (struct vnd_user *)data; 1175 KASSERT(l); 1176 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1177 case 0: 1178 vnu->vnu_dev = va.va_fsid; 1179 vnu->vnu_ino = va.va_fileid; 1180 break; 1181 case -1: 1182 /* unused is not an error */ 1183 vnu->vnu_dev = 0; 1184 vnu->vnu_ino = 0; 1185 break; 1186 default: 1187 return error; 1188 } 1189 break; 1190 } 1191 1192 case DIOCGDINFO: 1193 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1194 break; 1195 1196 #ifdef __HAVE_OLD_DISKLABEL 1197 case ODIOCGDINFO: 1198 newlabel = *(vnd->sc_dkdev.dk_label); 1199 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1200 return ENOTTY; 1201 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1202 break; 1203 #endif 1204 1205 case DIOCGPART: 1206 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1207 ((struct partinfo *)data)->part = 1208 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1209 break; 1210 1211 case DIOCWDINFO: 1212 case DIOCSDINFO: 1213 #ifdef __HAVE_OLD_DISKLABEL 1214 case ODIOCWDINFO: 1215 case ODIOCSDINFO: 1216 #endif 1217 { 1218 struct disklabel *lp; 1219 1220 if ((error = vndlock(vnd)) != 0) 1221 return (error); 1222 1223 vnd->sc_flags |= VNF_LABELLING; 1224 1225 #ifdef __HAVE_OLD_DISKLABEL 1226 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1227 memset(&newlabel, 0, sizeof newlabel); 1228 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1229 lp = &newlabel; 1230 } else 1231 #endif 1232 lp = (struct disklabel *)data; 1233 1234 error = setdisklabel(vnd->sc_dkdev.dk_label, 1235 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1236 if (error == 0) { 1237 if (cmd == DIOCWDINFO 1238 #ifdef __HAVE_OLD_DISKLABEL 1239 || cmd == ODIOCWDINFO 1240 #endif 1241 ) 1242 error = writedisklabel(VNDLABELDEV(dev), 1243 vndstrategy, vnd->sc_dkdev.dk_label, 1244 vnd->sc_dkdev.dk_cpulabel); 1245 } 1246 1247 vnd->sc_flags &= ~VNF_LABELLING; 1248 1249 vndunlock(vnd); 1250 1251 if (error) 1252 return (error); 1253 break; 1254 } 1255 1256 case DIOCKLABEL: 1257 if (*(int *)data != 0) 1258 vnd->sc_flags |= VNF_KLABEL; 1259 else 1260 vnd->sc_flags &= ~VNF_KLABEL; 1261 break; 1262 1263 case DIOCWLABEL: 1264 if (*(int *)data != 0) 1265 vnd->sc_flags |= VNF_WLABEL; 1266 else 1267 vnd->sc_flags &= ~VNF_WLABEL; 1268 break; 1269 1270 case DIOCGDEFLABEL: 1271 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1272 break; 1273 1274 #ifdef __HAVE_OLD_DISKLABEL 1275 case ODIOCGDEFLABEL: 1276 vndgetdefaultlabel(vnd, &newlabel); 1277 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1278 return ENOTTY; 1279 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1280 break; 1281 #endif 1282 1283 default: 1284 return (ENOTTY); 1285 } 1286 1287 return (0); 1288 } 1289 1290 /* 1291 * Duplicate the current processes' credentials. Since we are called only 1292 * as the result of a SET ioctl and only root can do that, any future access 1293 * to this "disk" is essentially as root. Note that credentials may change 1294 * if some other uid can write directly to the mapped file (NFS). 1295 */ 1296 static int 1297 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1298 { 1299 struct uio auio; 1300 struct iovec aiov; 1301 char *tmpbuf; 1302 int error; 1303 1304 vnd->sc_cred = kauth_cred_dup(cred); 1305 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1306 1307 /* XXX: Horrible kludge to establish credentials for NFS */ 1308 aiov.iov_base = tmpbuf; 1309 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1310 auio.uio_iov = &aiov; 1311 auio.uio_iovcnt = 1; 1312 auio.uio_offset = 0; 1313 auio.uio_rw = UIO_READ; 1314 auio.uio_resid = aiov.iov_len; 1315 UIO_SETUP_SYSSPACE(&auio); 1316 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1317 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1318 if (error == 0) { 1319 /* 1320 * Because vnd does all IO directly through the vnode 1321 * we need to flush (at least) the buffer from the above 1322 * VOP_READ from the buffer cache to prevent cache 1323 * incoherencies. Also, be careful to write dirty 1324 * buffers back to stable storage. 1325 */ 1326 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1327 curlwp, 0, 0); 1328 } 1329 VOP_UNLOCK(vnd->sc_vp, 0); 1330 1331 free(tmpbuf, M_TEMP); 1332 return (error); 1333 } 1334 1335 /* 1336 * Set maxactive based on FS type 1337 */ 1338 static void 1339 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1340 { 1341 #ifdef NFS 1342 extern int (**nfsv2_vnodeop_p)(void *); 1343 1344 if (vp->v_op == nfsv2_vnodeop_p) 1345 vnd->sc_maxactive = 2; 1346 else 1347 #endif 1348 vnd->sc_maxactive = 8; 1349 1350 if (vnd->sc_maxactive < 1) 1351 vnd->sc_maxactive = 1; 1352 } 1353 1354 #if 0 1355 static void 1356 vndshutdown(void) 1357 { 1358 struct vnd_softc *vnd; 1359 1360 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1361 if (vnd->sc_flags & VNF_INITED) 1362 vndclear(vnd); 1363 } 1364 #endif 1365 1366 static void 1367 vndclear(struct vnd_softc *vnd, int myminor) 1368 { 1369 struct vnode *vp = vnd->sc_vp; 1370 struct lwp *l = curlwp; 1371 int fflags = FREAD; 1372 int bmaj, cmaj, i, mn; 1373 int s; 1374 1375 #ifdef DEBUG 1376 if (vnddebug & VDB_FOLLOW) 1377 printf("vndclear(%p): vp %p\n", vnd, vp); 1378 #endif 1379 /* locate the major number */ 1380 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1381 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1382 1383 /* Nuke the vnodes for any open instances */ 1384 for (i = 0; i < MAXPARTITIONS; i++) { 1385 mn = DISKMINOR(device_unit(&vnd->sc_dev), i); 1386 vdevgone(bmaj, mn, mn, VBLK); 1387 if (mn != myminor) /* XXX avoid to kill own vnode */ 1388 vdevgone(cmaj, mn, mn, VCHR); 1389 } 1390 1391 if ((vnd->sc_flags & VNF_READONLY) == 0) 1392 fflags |= FWRITE; 1393 1394 s = splbio(); 1395 bufq_drain(vnd->sc_tab); 1396 splx(s); 1397 1398 vnd->sc_flags |= VNF_VUNCONF; 1399 wakeup(&vnd->sc_tab); 1400 while (vnd->sc_flags & VNF_KTHREAD) 1401 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1402 1403 #ifdef VND_COMPRESSION 1404 /* free the compressed file buffers */ 1405 if(vnd->sc_flags & VNF_COMP) { 1406 if(vnd->sc_comp_offsets) { 1407 free(vnd->sc_comp_offsets, M_DEVBUF); 1408 vnd->sc_comp_offsets = NULL; 1409 } 1410 if(vnd->sc_comp_buff) { 1411 free(vnd->sc_comp_buff, M_DEVBUF); 1412 vnd->sc_comp_buff = NULL; 1413 } 1414 if(vnd->sc_comp_decombuf) { 1415 free(vnd->sc_comp_decombuf, M_DEVBUF); 1416 vnd->sc_comp_decombuf = NULL; 1417 } 1418 } 1419 #endif /* VND_COMPRESSION */ 1420 vnd->sc_flags &= 1421 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1422 | VNF_VUNCONF | VNF_COMP); 1423 if (vp == (struct vnode *)0) 1424 panic("vndclear: null vp"); 1425 (void) vn_close(vp, fflags, vnd->sc_cred, l); 1426 kauth_cred_free(vnd->sc_cred); 1427 vnd->sc_vp = (struct vnode *)0; 1428 vnd->sc_cred = (kauth_cred_t)0; 1429 vnd->sc_size = 0; 1430 } 1431 1432 static int 1433 vndsize(dev_t dev) 1434 { 1435 struct vnd_softc *sc; 1436 struct disklabel *lp; 1437 int part, unit, omask; 1438 int size; 1439 1440 unit = vndunit(dev); 1441 sc = (struct vnd_softc *)device_lookup(&vnd_cd, unit); 1442 if (sc == NULL) 1443 return -1; 1444 1445 if ((sc->sc_flags & VNF_INITED) == 0) 1446 return (-1); 1447 1448 part = DISKPART(dev); 1449 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1450 lp = sc->sc_dkdev.dk_label; 1451 1452 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1453 return (-1); 1454 1455 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1456 size = -1; 1457 else 1458 size = lp->d_partitions[part].p_size * 1459 (lp->d_secsize / DEV_BSIZE); 1460 1461 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1462 return (-1); 1463 1464 return (size); 1465 } 1466 1467 static int 1468 vnddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 1469 { 1470 1471 /* Not implemented. */ 1472 return ENXIO; 1473 } 1474 1475 static void 1476 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1477 { 1478 struct vndgeom *vng = &sc->sc_geom; 1479 struct partition *pp; 1480 1481 memset(lp, 0, sizeof(*lp)); 1482 1483 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1484 lp->d_secsize = vng->vng_secsize; 1485 lp->d_nsectors = vng->vng_nsectors; 1486 lp->d_ntracks = vng->vng_ntracks; 1487 lp->d_ncylinders = vng->vng_ncylinders; 1488 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1489 1490 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1491 lp->d_type = DTYPE_VND; 1492 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1493 lp->d_rpm = 3600; 1494 lp->d_interleave = 1; 1495 lp->d_flags = 0; 1496 1497 pp = &lp->d_partitions[RAW_PART]; 1498 pp->p_offset = 0; 1499 pp->p_size = lp->d_secperunit; 1500 pp->p_fstype = FS_UNUSED; 1501 lp->d_npartitions = RAW_PART + 1; 1502 1503 lp->d_magic = DISKMAGIC; 1504 lp->d_magic2 = DISKMAGIC; 1505 lp->d_checksum = dkcksum(lp); 1506 } 1507 1508 /* 1509 * Read the disklabel from a vnd. If one is not present, create a fake one. 1510 */ 1511 static void 1512 vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1513 { 1514 const char *errstring; 1515 struct disklabel *lp = sc->sc_dkdev.dk_label; 1516 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1517 int i; 1518 1519 memset(clp, 0, sizeof(*clp)); 1520 1521 vndgetdefaultlabel(sc, lp); 1522 1523 /* 1524 * Call the generic disklabel extraction routine. 1525 */ 1526 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1527 if (errstring) { 1528 /* 1529 * Lack of disklabel is common, but we print the warning 1530 * anyway, since it might contain other useful information. 1531 */ 1532 printf("%s: %s\n", sc->sc_dev.dv_xname, errstring); 1533 1534 /* 1535 * For historical reasons, if there's no disklabel 1536 * present, all partitions must be FS_BSDFFS and 1537 * occupy the entire disk. 1538 */ 1539 for (i = 0; i < MAXPARTITIONS; i++) { 1540 /* 1541 * Don't wipe out port specific hack (such as 1542 * dos partition hack of i386 port). 1543 */ 1544 if (lp->d_partitions[i].p_size != 0) 1545 continue; 1546 1547 lp->d_partitions[i].p_size = lp->d_secperunit; 1548 lp->d_partitions[i].p_offset = 0; 1549 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1550 } 1551 1552 strncpy(lp->d_packname, "default label", 1553 sizeof(lp->d_packname)); 1554 1555 lp->d_npartitions = MAXPARTITIONS; 1556 lp->d_checksum = dkcksum(lp); 1557 } 1558 1559 /* In-core label now valid. */ 1560 sc->sc_flags |= VNF_VLABEL; 1561 } 1562 1563 /* 1564 * Wait interruptibly for an exclusive lock. 1565 * 1566 * XXX 1567 * Several drivers do this; it should be abstracted and made MP-safe. 1568 */ 1569 static int 1570 vndlock(struct vnd_softc *sc) 1571 { 1572 int error; 1573 1574 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1575 sc->sc_flags |= VNF_WANTED; 1576 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1577 return (error); 1578 } 1579 sc->sc_flags |= VNF_LOCKED; 1580 return (0); 1581 } 1582 1583 /* 1584 * Unlock and wake up any waiters. 1585 */ 1586 static void 1587 vndunlock(struct vnd_softc *sc) 1588 { 1589 1590 sc->sc_flags &= ~VNF_LOCKED; 1591 if ((sc->sc_flags & VNF_WANTED) != 0) { 1592 sc->sc_flags &= ~VNF_WANTED; 1593 wakeup(sc); 1594 } 1595 } 1596 1597 #ifdef VND_COMPRESSION 1598 /* compressed file read */ 1599 static void 1600 compstrategy(struct buf *bp, off_t bn) 1601 { 1602 int error; 1603 int unit = vndunit(bp->b_dev); 1604 struct vnd_softc *vnd = 1605 (struct vnd_softc *)device_lookup(&vnd_cd, unit); 1606 u_int32_t comp_block; 1607 struct uio auio; 1608 caddr_t addr; 1609 int s; 1610 1611 /* set up constants for data move */ 1612 auio.uio_rw = UIO_READ; 1613 UIO_SETUP_SYSSPACE(&auio); 1614 1615 /* read, and transfer the data */ 1616 addr = bp->b_data; 1617 s = splbio(); 1618 while (bp->b_resid > 0) { 1619 unsigned length; 1620 size_t length_in_buffer; 1621 u_int32_t offset_in_buffer; 1622 struct iovec aiov; 1623 1624 /* calculate the compressed block number */ 1625 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1626 1627 /* check for good block number */ 1628 if (comp_block >= vnd->sc_comp_numoffs) { 1629 bp->b_error = EINVAL; 1630 bp->b_flags |= B_ERROR; 1631 splx(s); 1632 return; 1633 } 1634 1635 /* read in the compressed block, if not in buffer */ 1636 if (comp_block != vnd->sc_comp_buffblk) { 1637 length = vnd->sc_comp_offsets[comp_block + 1] - 1638 vnd->sc_comp_offsets[comp_block]; 1639 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1640 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1641 length, vnd->sc_comp_offsets[comp_block], 1642 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL); 1643 if (error) { 1644 bp->b_error = error; 1645 bp->b_flags |= B_ERROR; 1646 VOP_UNLOCK(vnd->sc_vp, 0); 1647 splx(s); 1648 return; 1649 } 1650 /* uncompress the buffer */ 1651 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1652 vnd->sc_comp_stream.avail_in = length; 1653 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1654 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1655 inflateReset(&vnd->sc_comp_stream); 1656 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1657 if (error != Z_STREAM_END) { 1658 if (vnd->sc_comp_stream.msg) 1659 printf("%s: compressed file, %s\n", 1660 vnd->sc_dev.dv_xname, 1661 vnd->sc_comp_stream.msg); 1662 bp->b_error = EBADMSG; 1663 bp->b_flags |= B_ERROR; 1664 VOP_UNLOCK(vnd->sc_vp, 0); 1665 splx(s); 1666 return; 1667 } 1668 vnd->sc_comp_buffblk = comp_block; 1669 VOP_UNLOCK(vnd->sc_vp, 0); 1670 } 1671 1672 /* transfer the usable uncompressed data */ 1673 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1674 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1675 if (length_in_buffer > bp->b_resid) 1676 length_in_buffer = bp->b_resid; 1677 auio.uio_iov = &aiov; 1678 auio.uio_iovcnt = 1; 1679 aiov.iov_base = addr; 1680 aiov.iov_len = length_in_buffer; 1681 auio.uio_resid = aiov.iov_len; 1682 auio.uio_offset = 0; 1683 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1684 length_in_buffer, &auio); 1685 if (error) { 1686 bp->b_error = error; 1687 bp->b_flags |= B_ERROR; 1688 splx(s); 1689 return; 1690 } 1691 1692 bn += length_in_buffer; 1693 addr += length_in_buffer; 1694 bp->b_resid -= length_in_buffer; 1695 } 1696 splx(s); 1697 } 1698 1699 /* compression memory allocation routines */ 1700 static void * 1701 vnd_alloc(void *aux, u_int items, u_int siz) 1702 { 1703 return malloc(items * siz, M_TEMP, M_NOWAIT); 1704 } 1705 1706 static void 1707 vnd_free(void *aux, void *ptr) 1708 { 1709 free(ptr, M_TEMP); 1710 } 1711 #endif /* VND_COMPRESSION */ 1712