1 /* $NetBSD: vnd.c,v 1.129 2006/01/11 00:50:29 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 72 * 73 * @(#)vn.c 8.9 (Berkeley) 5/14/95 74 */ 75 76 /* 77 * Copyright (c) 1988 University of Utah. 78 * 79 * This code is derived from software contributed to Berkeley by 80 * the Systems Programming Group of the University of Utah Computer 81 * Science Department. 82 * 83 * Redistribution and use in source and binary forms, with or without 84 * modification, are permitted provided that the following conditions 85 * are met: 86 * 1. Redistributions of source code must retain the above copyright 87 * notice, this list of conditions and the following disclaimer. 88 * 2. Redistributions in binary form must reproduce the above copyright 89 * notice, this list of conditions and the following disclaimer in the 90 * documentation and/or other materials provided with the distribution. 91 * 3. All advertising materials mentioning features or use of this software 92 * must display the following acknowledgement: 93 * This product includes software developed by the University of 94 * California, Berkeley and its contributors. 95 * 4. Neither the name of the University nor the names of its contributors 96 * may be used to endorse or promote products derived from this software 97 * without specific prior written permission. 98 * 99 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 101 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 102 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 103 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 104 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 105 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 106 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 107 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 108 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 109 * SUCH DAMAGE. 110 * 111 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 112 * 113 * @(#)vn.c 8.9 (Berkeley) 5/14/95 114 */ 115 116 /* 117 * Vnode disk driver. 118 * 119 * Block/character interface to a vnode. Allows one to treat a file 120 * as a disk (e.g. build a filesystem in it, mount it, etc.). 121 * 122 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 123 * instead of a simple VOP_RDWR. We do this to avoid distorting the 124 * local buffer cache. 125 * 126 * NOTE 2: There is a security issue involved with this driver. 127 * Once mounted all access to the contents of the "mapped" file via 128 * the special file is controlled by the permissions on the special 129 * file, the protection of the mapped file is ignored (effectively, 130 * by using root credentials in all transactions). 131 * 132 * NOTE 3: Doesn't interact with leases, should it? 133 */ 134 135 #include <sys/cdefs.h> 136 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.129 2006/01/11 00:50:29 yamt Exp $"); 137 138 #if defined(_KERNEL_OPT) 139 #include "fs_nfs.h" 140 #include "opt_vnd.h" 141 #endif 142 143 #include <sys/param.h> 144 #include <sys/systm.h> 145 #include <sys/namei.h> 146 #include <sys/proc.h> 147 #include <sys/kthread.h> 148 #include <sys/errno.h> 149 #include <sys/buf.h> 150 #include <sys/bufq.h> 151 #include <sys/malloc.h> 152 #include <sys/ioctl.h> 153 #include <sys/disklabel.h> 154 #include <sys/device.h> 155 #include <sys/disk.h> 156 #include <sys/stat.h> 157 #include <sys/mount.h> 158 #include <sys/vnode.h> 159 #include <sys/file.h> 160 #include <sys/uio.h> 161 #include <sys/conf.h> 162 #include <net/zlib.h> 163 164 #include <miscfs/specfs/specdev.h> 165 166 #include <dev/vndvar.h> 167 168 #if defined(VNDDEBUG) && !defined(DEBUG) 169 #define DEBUG 170 #endif 171 172 #ifdef DEBUG 173 int dovndcluster = 1; 174 #define VDB_FOLLOW 0x01 175 #define VDB_INIT 0x02 176 #define VDB_IO 0x04 177 #define VDB_LABEL 0x08 178 int vnddebug = 0x00; 179 #endif 180 181 #define vndunit(x) DISKUNIT(x) 182 183 struct vndxfer { 184 struct buf vx_buf; 185 struct vnd_softc *vx_vnd; 186 }; 187 #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 188 189 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 190 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 191 192 struct vnd_softc *vnd_softc; 193 int numvnd = 0; 194 195 #define VNDLABELDEV(dev) \ 196 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 197 198 /* called by main() at boot time (XXX: and the LKM driver) */ 199 void vndattach(int); 200 int vnddetach(void); 201 202 static void vndclear(struct vnd_softc *, int); 203 static int vndsetcred(struct vnd_softc *, struct ucred *); 204 static void vndthrottle(struct vnd_softc *, struct vnode *); 205 static void vndiodone(struct buf *); 206 #if 0 207 static void vndshutdown(void); 208 #endif 209 210 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 211 static void vndgetdisklabel(dev_t); 212 213 static int vndlock(struct vnd_softc *); 214 static void vndunlock(struct vnd_softc *); 215 #ifdef VND_COMPRESSION 216 static void compstrategy(struct buf *, off_t); 217 static void *vnd_alloc(void *, u_int, u_int); 218 static void vnd_free(void *, void *); 219 #endif /* VND_COMPRESSION */ 220 221 static void vndthread(void *); 222 223 static dev_type_open(vndopen); 224 static dev_type_close(vndclose); 225 static dev_type_read(vndread); 226 static dev_type_write(vndwrite); 227 static dev_type_ioctl(vndioctl); 228 static dev_type_strategy(vndstrategy); 229 static dev_type_dump(vnddump); 230 static dev_type_size(vndsize); 231 232 const struct bdevsw vnd_bdevsw = { 233 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 234 }; 235 236 const struct cdevsw vnd_cdevsw = { 237 vndopen, vndclose, vndread, vndwrite, vndioctl, 238 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 239 }; 240 241 static int vndattached; 242 243 void 244 vndattach(int num) 245 { 246 int i; 247 char *mem; 248 249 if (vndattached) 250 return; 251 vndattached = 1; 252 if (num <= 0) 253 return; 254 i = num * sizeof(struct vnd_softc); 255 mem = malloc(i, M_DEVBUF, M_NOWAIT|M_ZERO); 256 if (mem == NULL) { 257 printf("WARNING: no memory for vnode disks\n"); 258 return; 259 } 260 vnd_softc = (struct vnd_softc *)mem; 261 numvnd = num; 262 263 for (i = 0; i < numvnd; i++) { 264 vnd_softc[i].sc_unit = i; 265 vnd_softc[i].sc_comp_offsets = NULL; 266 vnd_softc[i].sc_comp_buff = NULL; 267 vnd_softc[i].sc_comp_decombuf = NULL; 268 bufq_alloc(&vnd_softc[i].sc_tab, 269 "disksort", BUFQ_SORT_RAWBLOCK); 270 pseudo_disk_init(&vnd_softc[i].sc_dkdev); 271 } 272 } 273 274 int 275 vnddetach(void) 276 { 277 int i; 278 279 /* First check we aren't in use. */ 280 for (i = 0; i < numvnd; i++) 281 if (vnd_softc[i].sc_flags & VNF_INITED) 282 return (EBUSY); 283 284 for (i = 0; i < numvnd; i++) 285 bufq_free(vnd_softc[i].sc_tab); 286 287 free(vnd_softc, M_DEVBUF); 288 vndattached = 0; 289 290 return (0); 291 } 292 293 static int 294 vndopen(dev_t dev, int flags, int mode, struct lwp *l) 295 { 296 int unit = vndunit(dev); 297 struct vnd_softc *sc; 298 int error = 0, part, pmask; 299 struct disklabel *lp; 300 301 #ifdef DEBUG 302 if (vnddebug & VDB_FOLLOW) 303 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 304 #endif 305 if (unit >= numvnd) 306 return (ENXIO); 307 sc = &vnd_softc[unit]; 308 309 if ((error = vndlock(sc)) != 0) 310 return (error); 311 312 lp = sc->sc_dkdev.dk_label; 313 314 part = DISKPART(dev); 315 pmask = (1 << part); 316 317 /* 318 * If we're initialized, check to see if there are any other 319 * open partitions. If not, then it's safe to update the 320 * in-core disklabel. Only read the disklabel if it is 321 * not already valid. 322 */ 323 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 324 sc->sc_dkdev.dk_openmask == 0) 325 vndgetdisklabel(dev); 326 327 /* Check that the partitions exists. */ 328 if (part != RAW_PART) { 329 if (((sc->sc_flags & VNF_INITED) == 0) || 330 ((part >= lp->d_npartitions) || 331 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 332 error = ENXIO; 333 goto done; 334 } 335 } 336 337 /* Prevent our unit from being unconfigured while open. */ 338 switch (mode) { 339 case S_IFCHR: 340 sc->sc_dkdev.dk_copenmask |= pmask; 341 break; 342 343 case S_IFBLK: 344 sc->sc_dkdev.dk_bopenmask |= pmask; 345 break; 346 } 347 sc->sc_dkdev.dk_openmask = 348 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 349 350 done: 351 vndunlock(sc); 352 return (error); 353 } 354 355 static int 356 vndclose(dev_t dev, int flags, int mode, struct lwp *l) 357 { 358 int unit = vndunit(dev); 359 struct vnd_softc *sc; 360 int error = 0, part; 361 362 #ifdef DEBUG 363 if (vnddebug & VDB_FOLLOW) 364 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 365 #endif 366 367 if (unit >= numvnd) 368 return (ENXIO); 369 sc = &vnd_softc[unit]; 370 371 if ((error = vndlock(sc)) != 0) 372 return (error); 373 374 part = DISKPART(dev); 375 376 /* ...that much closer to allowing unconfiguration... */ 377 switch (mode) { 378 case S_IFCHR: 379 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 380 break; 381 382 case S_IFBLK: 383 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 384 break; 385 } 386 sc->sc_dkdev.dk_openmask = 387 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 388 389 vndunlock(sc); 390 return (0); 391 } 392 393 /* 394 * Qeue the request, and wakeup the kernel thread to handle it. 395 */ 396 static void 397 vndstrategy(struct buf *bp) 398 { 399 int unit = vndunit(bp->b_dev); 400 struct vnd_softc *vnd = &vnd_softc[unit]; 401 struct disklabel *lp = vnd->sc_dkdev.dk_label; 402 daddr_t blkno; 403 int s = splbio(); 404 405 bp->b_resid = bp->b_bcount; 406 407 if ((vnd->sc_flags & VNF_INITED) == 0) { 408 bp->b_error = ENXIO; 409 bp->b_flags |= B_ERROR; 410 goto done; 411 } 412 413 /* 414 * The transfer must be a whole number of blocks. 415 */ 416 if ((bp->b_bcount % lp->d_secsize) != 0) { 417 bp->b_error = EINVAL; 418 bp->b_flags |= B_ERROR; 419 goto done; 420 } 421 422 /* 423 * check if we're read-only. 424 */ 425 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 426 bp->b_error = EACCES; 427 bp->b_flags |= B_ERROR; 428 goto done; 429 } 430 431 /* 432 * Do bounds checking and adjust transfer. If there's an error, 433 * the bounds check will flag that for us. 434 */ 435 if (DISKPART(bp->b_dev) != RAW_PART) { 436 if (bounds_check_with_label(&vnd->sc_dkdev, 437 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 438 goto done; 439 } 440 441 /* If it's a nil transfer, wake up the top half now. */ 442 if (bp->b_bcount == 0) 443 goto done; 444 445 /* 446 * Put the block number in terms of the logical blocksize 447 * of the "device". 448 */ 449 450 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 451 452 /* 453 * Translate the partition-relative block number to an absolute. 454 */ 455 if (DISKPART(bp->b_dev) != RAW_PART) { 456 struct partition *pp; 457 458 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 459 DISKPART(bp->b_dev)]; 460 blkno += pp->p_offset; 461 } 462 bp->b_rawblkno = blkno; 463 464 #ifdef DEBUG 465 if (vnddebug & VDB_FOLLOW) 466 printf("vndstrategy(%p): unit %d\n", bp, unit); 467 #endif 468 BUFQ_PUT(vnd->sc_tab, bp); 469 wakeup(&vnd->sc_tab); 470 splx(s); 471 return; 472 done: 473 biodone(bp); 474 splx(s); 475 } 476 477 static void 478 vndthread(void *arg) 479 { 480 struct vnd_softc *vnd = arg; 481 struct mount *mp; 482 int s, bsize; 483 int sz, error; 484 struct disklabel *lp; 485 486 s = splbio(); 487 vnd->sc_flags |= VNF_KTHREAD; 488 wakeup(&vnd->sc_kthread); 489 490 /* 491 * Dequeue requests, break them into bsize pieces and submit using 492 * VOP_BMAP/VOP_STRATEGY. 493 */ 494 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 495 struct vndxfer *vnx; 496 off_t offset; 497 int resid; 498 int skipped = 0; 499 off_t bn; 500 int flags; 501 struct buf *obp; 502 struct buf *bp; 503 504 obp = BUFQ_GET(vnd->sc_tab); 505 if (obp == NULL) { 506 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 507 continue; 508 }; 509 splx(s); 510 flags = obp->b_flags; 511 #ifdef DEBUG 512 if (vnddebug & VDB_FOLLOW) 513 printf("vndthread(%p\n", obp); 514 #endif 515 lp = vnd->sc_dkdev.dk_label; 516 517 /* convert to a byte offset within the file. */ 518 bn = obp->b_rawblkno * lp->d_secsize; 519 520 if (vnd->sc_vp->v_mount == NULL) { 521 obp->b_error = ENXIO; 522 obp->b_flags |= B_ERROR; 523 goto done; 524 } 525 #ifdef VND_COMPRESSION 526 /* handle a compressed read */ 527 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 528 compstrategy(obp, bn); 529 goto done; 530 } 531 #endif /* VND_COMPRESSION */ 532 533 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 534 535 /* 536 * Allocate a header for this transfer and link it to the 537 * buffer 538 */ 539 s = splbio(); 540 vnx = VND_GETXFER(vnd); 541 splx(s); 542 vnx->vx_vnd = vnd; 543 544 bp = &vnx->vx_buf; 545 BUF_INIT(bp); 546 bp->b_flags = (obp->b_flags & B_READ) | B_CALL; 547 bp->b_iodone = vndiodone; 548 bp->b_private = obp; 549 bp->b_vp = NULL; 550 bp->b_data = obp->b_data; 551 bp->b_bcount = bp->b_resid = obp->b_bcount; 552 BIO_COPYPRIO(bp, obp); 553 554 s = splbio(); 555 while (vnd->sc_active >= vnd->sc_maxactive) { 556 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 557 } 558 vnd->sc_active++; 559 splx(s); 560 561 if ((flags & B_READ) == 0) 562 vn_start_write(vnd->sc_vp, &mp, V_WAIT); 563 564 /* Instrumentation. */ 565 disk_busy(&vnd->sc_dkdev); 566 567 /* 568 * Feed requests sequentially. 569 * We do it this way to keep from flooding NFS servers if we 570 * are connected to an NFS file. This places the burden on 571 * the client rather than the server. 572 */ 573 error = 0; 574 for (offset = 0, resid = bp->b_resid; resid; 575 resid -= sz, offset += sz) { 576 struct buf *nbp; 577 struct vnode *vp; 578 daddr_t nbn; 579 int off, nra; 580 581 nra = 0; 582 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 583 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 584 VOP_UNLOCK(vnd->sc_vp, 0); 585 586 if (error == 0 && (long)nbn == -1) 587 error = EIO; 588 589 /* 590 * If there was an error or a hole in the file...punt. 591 * Note that we may have to wait for any operations 592 * that we have already fired off before releasing 593 * the buffer. 594 * 595 * XXX we could deal with holes here but it would be 596 * a hassle (in the write case). 597 */ 598 if (error) { 599 skipped += resid; 600 break; 601 } 602 603 #ifdef DEBUG 604 if (!dovndcluster) 605 nra = 0; 606 #endif 607 608 if ((off = bn % bsize) != 0) 609 sz = bsize - off; 610 else 611 sz = (1 + nra) * bsize; 612 if (resid < sz) 613 sz = resid; 614 #ifdef DEBUG 615 if (vnddebug & VDB_IO) 616 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 617 " sz 0x%x\n", 618 vnd->sc_vp, vp, (long long)bn, nbn, sz); 619 #endif 620 621 nbp = getiobuf(); 622 nestiobuf_setup(bp, nbp, offset, sz); 623 nbp->b_blkno = nbn + btodb(off); 624 625 #if 0 /* XXX #ifdef DEBUG */ 626 if (vnddebug & VDB_IO) 627 printf("vndstart(%ld): bp %p vp %p blkno " 628 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 629 (long) (vnd-vnd_softc), &nbp->vb_buf, 630 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 631 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 632 nbp->vb_buf.b_bcount); 633 #endif 634 VOP_STRATEGY(vp, nbp); 635 bn += sz; 636 } 637 nestiobuf_done(bp, skipped, error); 638 639 if ((flags & B_READ) == 0) 640 vn_finished_write(mp, 0); 641 642 s = splbio(); 643 continue; 644 done: 645 biodone(obp); 646 s = splbio(); 647 } 648 649 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 650 wakeup(&vnd->sc_kthread); 651 splx(s); 652 kthread_exit(0); 653 } 654 655 static void 656 vndiodone(struct buf *bp) 657 { 658 struct vndxfer *vnx = VND_BUFTOXFER(bp); 659 struct vnd_softc *vnd = vnx->vx_vnd; 660 struct buf *obp = bp->b_private; 661 662 KASSERT(&vnx->vx_buf == bp); 663 KASSERT(vnd->sc_active > 0); 664 #ifdef DEBUG 665 if (vnddebug & VDB_IO) { 666 printf("vndiodone1: bp %p iodone: error %d\n", 667 bp, (bp->b_flags & B_ERROR) != 0 ? bp->b_error : 0); 668 } 669 #endif 670 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 671 (bp->b_flags & B_READ)); 672 vnd->sc_active--; 673 if (vnd->sc_active == 0) { 674 wakeup(&vnd->sc_tab); 675 } 676 obp->b_flags |= bp->b_flags & B_ERROR; 677 obp->b_error = bp->b_error; 678 obp->b_resid = bp->b_resid; 679 VND_PUTXFER(vnd, vnx); 680 biodone(obp); 681 } 682 683 /* ARGSUSED */ 684 static int 685 vndread(dev_t dev, struct uio *uio, int flags) 686 { 687 int unit = vndunit(dev); 688 struct vnd_softc *sc; 689 690 #ifdef DEBUG 691 if (vnddebug & VDB_FOLLOW) 692 printf("vndread(0x%x, %p)\n", dev, uio); 693 #endif 694 695 if (unit >= numvnd) 696 return (ENXIO); 697 sc = &vnd_softc[unit]; 698 699 if ((sc->sc_flags & VNF_INITED) == 0) 700 return (ENXIO); 701 702 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 703 } 704 705 /* ARGSUSED */ 706 static int 707 vndwrite(dev_t dev, struct uio *uio, int flags) 708 { 709 int unit = vndunit(dev); 710 struct vnd_softc *sc; 711 712 #ifdef DEBUG 713 if (vnddebug & VDB_FOLLOW) 714 printf("vndwrite(0x%x, %p)\n", dev, uio); 715 #endif 716 717 if (unit >= numvnd) 718 return (ENXIO); 719 sc = &vnd_softc[unit]; 720 721 if ((sc->sc_flags & VNF_INITED) == 0) 722 return (ENXIO); 723 724 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 725 } 726 727 static int 728 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 729 { 730 struct vnd_softc *vnd; 731 732 if (*un == -1) 733 *un = unit; 734 if (*un >= numvnd) 735 return ENXIO; 736 if (*un < 0) 737 return EINVAL; 738 739 vnd = &vnd_softc[*un]; 740 741 if ((vnd->sc_flags & VNF_INITED) == 0) 742 return -1; 743 744 return VOP_GETATTR(vnd->sc_vp, va, l->l_proc->p_ucred, l); 745 } 746 747 /* ARGSUSED */ 748 static int 749 vndioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l) 750 { 751 int unit = vndunit(dev); 752 struct vnd_softc *vnd; 753 struct vnd_ioctl *vio; 754 struct vattr vattr; 755 struct nameidata nd; 756 int error, part, pmask; 757 size_t geomsize; 758 struct proc *p = (l != NULL) ? l->l_proc : NULL; 759 int fflags; 760 #ifdef __HAVE_OLD_DISKLABEL 761 struct disklabel newlabel; 762 #endif 763 764 #ifdef DEBUG 765 if (vnddebug & VDB_FOLLOW) 766 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 767 dev, cmd, data, flag, p, unit); 768 #endif 769 if (unit >= numvnd) 770 return (ENXIO); 771 772 vnd = &vnd_softc[unit]; 773 vio = (struct vnd_ioctl *)data; 774 775 /* Must be open for writes for these commands... */ 776 switch (cmd) { 777 case VNDIOCSET: 778 case VNDIOCCLR: 779 case DIOCSDINFO: 780 case DIOCWDINFO: 781 #ifdef __HAVE_OLD_DISKLABEL 782 case ODIOCSDINFO: 783 case ODIOCWDINFO: 784 #endif 785 case DIOCKLABEL: 786 case DIOCWLABEL: 787 if ((flag & FWRITE) == 0) 788 return (EBADF); 789 } 790 791 /* Must be initialized for these... */ 792 switch (cmd) { 793 case VNDIOCCLR: 794 case DIOCGDINFO: 795 case DIOCSDINFO: 796 case DIOCWDINFO: 797 case DIOCGPART: 798 case DIOCKLABEL: 799 case DIOCWLABEL: 800 case DIOCGDEFLABEL: 801 #ifdef __HAVE_OLD_DISKLABEL 802 case ODIOCGDINFO: 803 case ODIOCSDINFO: 804 case ODIOCWDINFO: 805 case ODIOCGDEFLABEL: 806 #endif 807 if ((vnd->sc_flags & VNF_INITED) == 0) 808 return (ENXIO); 809 } 810 811 switch (cmd) { 812 case VNDIOCSET: 813 if (vnd->sc_flags & VNF_INITED) 814 return (EBUSY); 815 816 if ((error = vndlock(vnd)) != 0) 817 return (error); 818 819 fflags = FREAD; 820 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 821 fflags |= FWRITE; 822 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, l); 823 if ((error = vn_open(&nd, fflags, 0)) != 0) 824 goto unlock_and_exit; 825 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_proc->p_ucred, l); 826 if (!error && nd.ni_vp->v_type != VREG) 827 error = EOPNOTSUPP; 828 if (error) { 829 VOP_UNLOCK(nd.ni_vp, 0); 830 goto close_and_exit; 831 } 832 833 /* If using a compressed file, initialize its info */ 834 /* (or abort with an error if kernel has no compression) */ 835 if (vio->vnd_flags & VNF_COMP) { 836 #ifdef VND_COMPRESSION 837 struct vnd_comp_header *ch; 838 int i; 839 u_int32_t comp_size; 840 u_int32_t comp_maxsize; 841 842 /* allocate space for compresed file header */ 843 ch = malloc(sizeof(struct vnd_comp_header), 844 M_TEMP, M_WAITOK); 845 846 /* read compressed file header */ 847 error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)ch, 848 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 849 IO_UNIT|IO_NODELOCKED, p->p_ucred, NULL, NULL); 850 if(error) { 851 free(ch, M_TEMP); 852 VOP_UNLOCK(nd.ni_vp, 0); 853 goto close_and_exit; 854 } 855 856 /* save some header info */ 857 vnd->sc_comp_blksz = ntohl(ch->block_size); 858 /* note last offset is the file byte size */ 859 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 860 free(ch, M_TEMP); 861 if(vnd->sc_comp_blksz % DEV_BSIZE !=0) { 862 VOP_UNLOCK(nd.ni_vp, 0); 863 error = EINVAL; 864 goto close_and_exit; 865 } 866 if(sizeof(struct vnd_comp_header) + 867 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 868 vattr.va_size) { 869 VOP_UNLOCK(nd.ni_vp, 0); 870 error = EINVAL; 871 goto close_and_exit; 872 } 873 874 /* set decompressed file size */ 875 vattr.va_size = 876 (vnd->sc_comp_numoffs - 1) * vnd->sc_comp_blksz; 877 878 /* allocate space for all the compressed offsets */ 879 vnd->sc_comp_offsets = 880 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 881 M_DEVBUF, M_WAITOK); 882 883 /* read in the offsets */ 884 error = vn_rdwr(UIO_READ, nd.ni_vp, 885 (caddr_t)vnd->sc_comp_offsets, 886 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 887 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 888 IO_UNIT|IO_NODELOCKED, p->p_ucred, NULL, NULL); 889 if(error) { 890 VOP_UNLOCK(nd.ni_vp, 0); 891 goto close_and_exit; 892 } 893 /* 894 * find largest block size (used for allocation limit). 895 * Also convert offset to native byte order. 896 */ 897 comp_maxsize = 0; 898 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 899 vnd->sc_comp_offsets[i] = 900 be64toh(vnd->sc_comp_offsets[i]); 901 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 902 - vnd->sc_comp_offsets[i]; 903 if (comp_size > comp_maxsize) 904 comp_maxsize = comp_size; 905 } 906 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 907 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 908 909 /* create compressed data buffer */ 910 vnd->sc_comp_buff = malloc(comp_maxsize, 911 M_DEVBUF, M_WAITOK); 912 913 /* create decompressed buffer */ 914 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 915 M_DEVBUF, M_WAITOK); 916 vnd->sc_comp_buffblk = -1; 917 918 /* Initialize decompress stream */ 919 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 920 vnd->sc_comp_stream.zalloc = vnd_alloc; 921 vnd->sc_comp_stream.zfree = vnd_free; 922 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 923 if(error) { 924 if(vnd->sc_comp_stream.msg) 925 printf("vnd%d: compressed file, %s\n", 926 unit, vnd->sc_comp_stream.msg); 927 VOP_UNLOCK(nd.ni_vp, 0); 928 error = EINVAL; 929 goto close_and_exit; 930 } 931 932 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 933 #else /* !VND_COMPRESSION */ 934 VOP_UNLOCK(nd.ni_vp, 0); 935 error = EOPNOTSUPP; 936 goto close_and_exit; 937 #endif /* VND_COMPRESSION */ 938 } 939 940 VOP_UNLOCK(nd.ni_vp, 0); 941 vnd->sc_vp = nd.ni_vp; 942 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 943 944 /* 945 * Use pseudo-geometry specified. If none was provided, 946 * use "standard" Adaptec fictitious geometry. 947 */ 948 if (vio->vnd_flags & VNDIOF_HASGEOM) { 949 950 memcpy(&vnd->sc_geom, &vio->vnd_geom, 951 sizeof(vio->vnd_geom)); 952 953 /* 954 * Sanity-check the sector size. 955 * XXX Don't allow secsize < DEV_BSIZE. Should 956 * XXX we? 957 */ 958 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 959 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 960 vnd->sc_geom.vng_ncylinders == 0 || 961 (vnd->sc_geom.vng_ntracks * 962 vnd->sc_geom.vng_nsectors) == 0) { 963 error = EINVAL; 964 goto close_and_exit; 965 } 966 967 /* 968 * Compute the size (in DEV_BSIZE blocks) specified 969 * by the geometry. 970 */ 971 geomsize = (vnd->sc_geom.vng_nsectors * 972 vnd->sc_geom.vng_ntracks * 973 vnd->sc_geom.vng_ncylinders) * 974 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 975 976 /* 977 * Sanity-check the size against the specified 978 * geometry. 979 */ 980 if (vnd->sc_size < geomsize) { 981 error = EINVAL; 982 goto close_and_exit; 983 } 984 } else if (vnd->sc_size >= (32 * 64)) { 985 /* 986 * Size must be at least 2048 DEV_BSIZE blocks 987 * (1M) in order to use this geometry. 988 */ 989 vnd->sc_geom.vng_secsize = DEV_BSIZE; 990 vnd->sc_geom.vng_nsectors = 32; 991 vnd->sc_geom.vng_ntracks = 64; 992 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 993 } else { 994 vnd->sc_geom.vng_secsize = DEV_BSIZE; 995 vnd->sc_geom.vng_nsectors = 1; 996 vnd->sc_geom.vng_ntracks = 1; 997 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 998 } 999 1000 if (vio->vnd_flags & VNDIOF_READONLY) { 1001 vnd->sc_flags |= VNF_READONLY; 1002 } 1003 1004 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) 1005 goto close_and_exit; 1006 1007 memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */ 1008 snprintf(vnd->sc_xname, sizeof(vnd->sc_xname), "vnd%d", unit); 1009 1010 1011 vndthrottle(vnd, vnd->sc_vp); 1012 vio->vnd_size = dbtob(vnd->sc_size); 1013 vnd->sc_flags |= VNF_INITED; 1014 1015 /* create the kernel thread, wait for it to be up */ 1016 error = kthread_create1(vndthread, vnd, &vnd->sc_kthread, 1017 vnd->sc_xname); 1018 if (error) 1019 goto close_and_exit; 1020 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1021 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1022 } 1023 #ifdef DEBUG 1024 if (vnddebug & VDB_INIT) 1025 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1026 vnd->sc_vp, (unsigned long) vnd->sc_size, 1027 vnd->sc_geom.vng_secsize, 1028 vnd->sc_geom.vng_nsectors, 1029 vnd->sc_geom.vng_ntracks, 1030 vnd->sc_geom.vng_ncylinders); 1031 #endif 1032 1033 /* Attach the disk. */ 1034 vnd->sc_dkdev.dk_name = vnd->sc_xname; 1035 pseudo_disk_attach(&vnd->sc_dkdev); 1036 1037 /* Initialize the xfer and buffer pools. */ 1038 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1039 0, 0, "vndxpl", NULL); 1040 1041 /* Try and read the disklabel. */ 1042 vndgetdisklabel(dev); 1043 1044 vndunlock(vnd); 1045 1046 break; 1047 1048 close_and_exit: 1049 (void) vn_close(nd.ni_vp, fflags, p->p_ucred, l); 1050 unlock_and_exit: 1051 #ifdef VND_COMPRESSION 1052 /* free any allocated memory (for compressed file) */ 1053 if(vnd->sc_comp_offsets) { 1054 free(vnd->sc_comp_offsets, M_DEVBUF); 1055 vnd->sc_comp_offsets = NULL; 1056 } 1057 if(vnd->sc_comp_buff) { 1058 free(vnd->sc_comp_buff, M_DEVBUF); 1059 vnd->sc_comp_buff = NULL; 1060 } 1061 if(vnd->sc_comp_decombuf) { 1062 free(vnd->sc_comp_decombuf, M_DEVBUF); 1063 vnd->sc_comp_decombuf = NULL; 1064 } 1065 #endif /* VND_COMPRESSION */ 1066 vndunlock(vnd); 1067 return (error); 1068 1069 case VNDIOCCLR: 1070 if ((error = vndlock(vnd)) != 0) 1071 return (error); 1072 1073 /* 1074 * Don't unconfigure if any other partitions are open 1075 * or if both the character and block flavors of this 1076 * partition are open. 1077 */ 1078 part = DISKPART(dev); 1079 pmask = (1 << part); 1080 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1081 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1082 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1083 !(vio->vnd_flags & VNDIOF_FORCE)) { 1084 vndunlock(vnd); 1085 return (EBUSY); 1086 } 1087 1088 /* 1089 * XXX vndclear() might call vndclose() implicitely; 1090 * release lock to avoid recursion 1091 */ 1092 vndunlock(vnd); 1093 vndclear(vnd, minor(dev)); 1094 #ifdef DEBUG 1095 if (vnddebug & VDB_INIT) 1096 printf("vndioctl: CLRed\n"); 1097 #endif 1098 1099 /* Destroy the xfer and buffer pools. */ 1100 pool_destroy(&vnd->sc_vxpool); 1101 1102 /* Detatch the disk. */ 1103 pseudo_disk_detach(&vnd->sc_dkdev); 1104 1105 break; 1106 1107 #ifdef COMPAT_30 1108 case VNDIOOCGET: { 1109 struct vnd_ouser *vnu; 1110 struct vattr va; 1111 vnu = (struct vnd_ouser *)data; 1112 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1113 case 0: 1114 vnu->vnu_dev = va.va_fsid; 1115 vnu->vnu_ino = va.va_fileid; 1116 break; 1117 case -1: 1118 /* unused is not an error */ 1119 vnu->vnu_dev = 0; 1120 vnu->vnu_ino = 0; 1121 break; 1122 default: 1123 return error; 1124 } 1125 break; 1126 } 1127 #endif 1128 case VNDIOCGET: { 1129 struct vnd_user *vnu; 1130 struct vattr va; 1131 vnu = (struct vnd_user *)data; 1132 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1133 case 0: 1134 vnu->vnu_dev = va.va_fsid; 1135 vnu->vnu_ino = va.va_fileid; 1136 break; 1137 case -1: 1138 /* unused is not an error */ 1139 vnu->vnu_dev = 0; 1140 vnu->vnu_ino = 0; 1141 break; 1142 default: 1143 return error; 1144 } 1145 break; 1146 } 1147 1148 case DIOCGDINFO: 1149 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1150 break; 1151 1152 #ifdef __HAVE_OLD_DISKLABEL 1153 case ODIOCGDINFO: 1154 newlabel = *(vnd->sc_dkdev.dk_label); 1155 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1156 return ENOTTY; 1157 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1158 break; 1159 #endif 1160 1161 case DIOCGPART: 1162 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1163 ((struct partinfo *)data)->part = 1164 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1165 break; 1166 1167 case DIOCWDINFO: 1168 case DIOCSDINFO: 1169 #ifdef __HAVE_OLD_DISKLABEL 1170 case ODIOCWDINFO: 1171 case ODIOCSDINFO: 1172 #endif 1173 { 1174 struct disklabel *lp; 1175 1176 if ((error = vndlock(vnd)) != 0) 1177 return (error); 1178 1179 vnd->sc_flags |= VNF_LABELLING; 1180 1181 #ifdef __HAVE_OLD_DISKLABEL 1182 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1183 memset(&newlabel, 0, sizeof newlabel); 1184 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1185 lp = &newlabel; 1186 } else 1187 #endif 1188 lp = (struct disklabel *)data; 1189 1190 error = setdisklabel(vnd->sc_dkdev.dk_label, 1191 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1192 if (error == 0) { 1193 if (cmd == DIOCWDINFO 1194 #ifdef __HAVE_OLD_DISKLABEL 1195 || cmd == ODIOCWDINFO 1196 #endif 1197 ) 1198 error = writedisklabel(VNDLABELDEV(dev), 1199 vndstrategy, vnd->sc_dkdev.dk_label, 1200 vnd->sc_dkdev.dk_cpulabel); 1201 } 1202 1203 vnd->sc_flags &= ~VNF_LABELLING; 1204 1205 vndunlock(vnd); 1206 1207 if (error) 1208 return (error); 1209 break; 1210 } 1211 1212 case DIOCKLABEL: 1213 if (*(int *)data != 0) 1214 vnd->sc_flags |= VNF_KLABEL; 1215 else 1216 vnd->sc_flags &= ~VNF_KLABEL; 1217 break; 1218 1219 case DIOCWLABEL: 1220 if (*(int *)data != 0) 1221 vnd->sc_flags |= VNF_WLABEL; 1222 else 1223 vnd->sc_flags &= ~VNF_WLABEL; 1224 break; 1225 1226 case DIOCGDEFLABEL: 1227 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1228 break; 1229 1230 #ifdef __HAVE_OLD_DISKLABEL 1231 case ODIOCGDEFLABEL: 1232 vndgetdefaultlabel(vnd, &newlabel); 1233 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1234 return ENOTTY; 1235 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1236 break; 1237 #endif 1238 1239 default: 1240 return (ENOTTY); 1241 } 1242 1243 return (0); 1244 } 1245 1246 /* 1247 * Duplicate the current processes' credentials. Since we are called only 1248 * as the result of a SET ioctl and only root can do that, any future access 1249 * to this "disk" is essentially as root. Note that credentials may change 1250 * if some other uid can write directly to the mapped file (NFS). 1251 */ 1252 static int 1253 vndsetcred(struct vnd_softc *vnd, struct ucred *cred) 1254 { 1255 struct uio auio; 1256 struct iovec aiov; 1257 char *tmpbuf; 1258 int error; 1259 1260 vnd->sc_cred = crdup(cred); 1261 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1262 1263 /* XXX: Horrible kludge to establish credentials for NFS */ 1264 aiov.iov_base = tmpbuf; 1265 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1266 auio.uio_iov = &aiov; 1267 auio.uio_iovcnt = 1; 1268 auio.uio_offset = 0; 1269 auio.uio_rw = UIO_READ; 1270 auio.uio_segflg = UIO_SYSSPACE; 1271 auio.uio_resid = aiov.iov_len; 1272 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1273 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1274 if (error == 0) { 1275 /* 1276 * Because vnd does all IO directly through the vnode 1277 * we need to flush (at least) the buffer from the above 1278 * VOP_READ from the buffer cache to prevent cache 1279 * incoherencies. Also, be careful to write dirty 1280 * buffers back to stable storage. 1281 */ 1282 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1283 curlwp, 0, 0); 1284 } 1285 VOP_UNLOCK(vnd->sc_vp, 0); 1286 1287 free(tmpbuf, M_TEMP); 1288 return (error); 1289 } 1290 1291 /* 1292 * Set maxactive based on FS type 1293 */ 1294 static void 1295 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1296 { 1297 #ifdef NFS 1298 extern int (**nfsv2_vnodeop_p)(void *); 1299 1300 if (vp->v_op == nfsv2_vnodeop_p) 1301 vnd->sc_maxactive = 2; 1302 else 1303 #endif 1304 vnd->sc_maxactive = 8; 1305 1306 if (vnd->sc_maxactive < 1) 1307 vnd->sc_maxactive = 1; 1308 } 1309 1310 #if 0 1311 static void 1312 vndshutdown(void) 1313 { 1314 struct vnd_softc *vnd; 1315 1316 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1317 if (vnd->sc_flags & VNF_INITED) 1318 vndclear(vnd); 1319 } 1320 #endif 1321 1322 static void 1323 vndclear(struct vnd_softc *vnd, int myminor) 1324 { 1325 struct vnode *vp = vnd->sc_vp; 1326 struct lwp *l = curlwp; 1327 int fflags = FREAD; 1328 int bmaj, cmaj, i, mn; 1329 int s; 1330 1331 #ifdef DEBUG 1332 if (vnddebug & VDB_FOLLOW) 1333 printf("vndclear(%p): vp %p\n", vnd, vp); 1334 #endif 1335 /* locate the major number */ 1336 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1337 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1338 1339 /* Nuke the vnodes for any open instances */ 1340 for (i = 0; i < MAXPARTITIONS; i++) { 1341 mn = DISKMINOR(vnd->sc_unit, i); 1342 vdevgone(bmaj, mn, mn, VBLK); 1343 if (mn != myminor) /* XXX avoid to kill own vnode */ 1344 vdevgone(cmaj, mn, mn, VCHR); 1345 } 1346 1347 if ((vnd->sc_flags & VNF_READONLY) == 0) 1348 fflags |= FWRITE; 1349 1350 s = splbio(); 1351 bufq_drain(vnd->sc_tab); 1352 splx(s); 1353 1354 vnd->sc_flags |= VNF_VUNCONF; 1355 wakeup(&vnd->sc_tab); 1356 while (vnd->sc_flags & VNF_KTHREAD) 1357 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1358 1359 #ifdef VND_COMPRESSION 1360 /* free the compressed file buffers */ 1361 if(vnd->sc_flags & VNF_COMP) { 1362 if(vnd->sc_comp_offsets) { 1363 free(vnd->sc_comp_offsets, M_DEVBUF); 1364 vnd->sc_comp_offsets = NULL; 1365 } 1366 if(vnd->sc_comp_buff) { 1367 free(vnd->sc_comp_buff, M_DEVBUF); 1368 vnd->sc_comp_buff = NULL; 1369 } 1370 if(vnd->sc_comp_decombuf) { 1371 free(vnd->sc_comp_decombuf, M_DEVBUF); 1372 vnd->sc_comp_decombuf = NULL; 1373 } 1374 } 1375 #endif /* VND_COMPRESSION */ 1376 vnd->sc_flags &= 1377 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1378 | VNF_VUNCONF | VNF_COMP); 1379 if (vp == (struct vnode *)0) 1380 panic("vndclear: null vp"); 1381 (void) vn_close(vp, fflags, vnd->sc_cred, l); 1382 crfree(vnd->sc_cred); 1383 vnd->sc_vp = (struct vnode *)0; 1384 vnd->sc_cred = (struct ucred *)0; 1385 vnd->sc_size = 0; 1386 } 1387 1388 static int 1389 vndsize(dev_t dev) 1390 { 1391 struct vnd_softc *sc; 1392 struct disklabel *lp; 1393 int part, unit, omask; 1394 int size; 1395 1396 unit = vndunit(dev); 1397 if (unit >= numvnd) 1398 return (-1); 1399 sc = &vnd_softc[unit]; 1400 1401 if ((sc->sc_flags & VNF_INITED) == 0) 1402 return (-1); 1403 1404 part = DISKPART(dev); 1405 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1406 lp = sc->sc_dkdev.dk_label; 1407 1408 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1409 return (-1); 1410 1411 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1412 size = -1; 1413 else 1414 size = lp->d_partitions[part].p_size * 1415 (lp->d_secsize / DEV_BSIZE); 1416 1417 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1418 return (-1); 1419 1420 return (size); 1421 } 1422 1423 static int 1424 vnddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 1425 { 1426 1427 /* Not implemented. */ 1428 return ENXIO; 1429 } 1430 1431 static void 1432 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1433 { 1434 struct vndgeom *vng = &sc->sc_geom; 1435 struct partition *pp; 1436 1437 memset(lp, 0, sizeof(*lp)); 1438 1439 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1440 lp->d_secsize = vng->vng_secsize; 1441 lp->d_nsectors = vng->vng_nsectors; 1442 lp->d_ntracks = vng->vng_ntracks; 1443 lp->d_ncylinders = vng->vng_ncylinders; 1444 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1445 1446 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1447 lp->d_type = DTYPE_VND; 1448 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1449 lp->d_rpm = 3600; 1450 lp->d_interleave = 1; 1451 lp->d_flags = 0; 1452 1453 pp = &lp->d_partitions[RAW_PART]; 1454 pp->p_offset = 0; 1455 pp->p_size = lp->d_secperunit; 1456 pp->p_fstype = FS_UNUSED; 1457 lp->d_npartitions = RAW_PART + 1; 1458 1459 lp->d_magic = DISKMAGIC; 1460 lp->d_magic2 = DISKMAGIC; 1461 lp->d_checksum = dkcksum(lp); 1462 } 1463 1464 /* 1465 * Read the disklabel from a vnd. If one is not present, create a fake one. 1466 */ 1467 static void 1468 vndgetdisklabel(dev_t dev) 1469 { 1470 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1471 const char *errstring; 1472 struct disklabel *lp = sc->sc_dkdev.dk_label; 1473 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1474 int i; 1475 1476 memset(clp, 0, sizeof(*clp)); 1477 1478 vndgetdefaultlabel(sc, lp); 1479 1480 /* 1481 * Call the generic disklabel extraction routine. 1482 */ 1483 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1484 if (errstring) { 1485 /* 1486 * Lack of disklabel is common, but we print the warning 1487 * anyway, since it might contain other useful information. 1488 */ 1489 printf("%s: %s\n", sc->sc_xname, errstring); 1490 1491 /* 1492 * For historical reasons, if there's no disklabel 1493 * present, all partitions must be FS_BSDFFS and 1494 * occupy the entire disk. 1495 */ 1496 for (i = 0; i < MAXPARTITIONS; i++) { 1497 /* 1498 * Don't wipe out port specific hack (such as 1499 * dos partition hack of i386 port). 1500 */ 1501 if (lp->d_partitions[i].p_size != 0) 1502 continue; 1503 1504 lp->d_partitions[i].p_size = lp->d_secperunit; 1505 lp->d_partitions[i].p_offset = 0; 1506 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1507 } 1508 1509 strncpy(lp->d_packname, "default label", 1510 sizeof(lp->d_packname)); 1511 1512 lp->d_npartitions = MAXPARTITIONS; 1513 lp->d_checksum = dkcksum(lp); 1514 } 1515 1516 /* In-core label now valid. */ 1517 sc->sc_flags |= VNF_VLABEL; 1518 } 1519 1520 /* 1521 * Wait interruptibly for an exclusive lock. 1522 * 1523 * XXX 1524 * Several drivers do this; it should be abstracted and made MP-safe. 1525 */ 1526 static int 1527 vndlock(struct vnd_softc *sc) 1528 { 1529 int error; 1530 1531 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1532 sc->sc_flags |= VNF_WANTED; 1533 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1534 return (error); 1535 } 1536 sc->sc_flags |= VNF_LOCKED; 1537 return (0); 1538 } 1539 1540 /* 1541 * Unlock and wake up any waiters. 1542 */ 1543 static void 1544 vndunlock(struct vnd_softc *sc) 1545 { 1546 1547 sc->sc_flags &= ~VNF_LOCKED; 1548 if ((sc->sc_flags & VNF_WANTED) != 0) { 1549 sc->sc_flags &= ~VNF_WANTED; 1550 wakeup(sc); 1551 } 1552 } 1553 1554 #ifdef VND_COMPRESSION 1555 /* compressed file read */ 1556 static void 1557 compstrategy(struct buf *bp, off_t bn) 1558 { 1559 int error; 1560 int unit = vndunit(bp->b_dev); 1561 struct vnd_softc *vnd = &vnd_softc[unit]; 1562 u_int32_t comp_block; 1563 struct uio auio; 1564 caddr_t addr; 1565 int s; 1566 1567 /* set up constants for data move */ 1568 auio.uio_rw = UIO_READ; 1569 auio.uio_segflg = bp->b_flags & B_PHYS ? UIO_USERSPACE : UIO_SYSSPACE; 1570 auio.uio_lwp = LIST_FIRST(&bp->b_proc->p_lwps); 1571 1572 /* read, and transfer the data */ 1573 addr = bp->b_data; 1574 s = splbio(); 1575 while (bp->b_resid > 0) { 1576 unsigned length; 1577 size_t length_in_buffer; 1578 u_int32_t offset_in_buffer; 1579 struct iovec aiov; 1580 1581 /* calculate the compressed block number */ 1582 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1583 1584 /* check for good block number */ 1585 if (comp_block >= vnd->sc_comp_numoffs) { 1586 bp->b_error = EINVAL; 1587 bp->b_flags |= B_ERROR; 1588 splx(s); 1589 return; 1590 } 1591 1592 /* read in the compressed block, if not in buffer */ 1593 if (comp_block != vnd->sc_comp_buffblk) { 1594 length = vnd->sc_comp_offsets[comp_block + 1] - 1595 vnd->sc_comp_offsets[comp_block]; 1596 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1597 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1598 length, vnd->sc_comp_offsets[comp_block], 1599 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL); 1600 if (error) { 1601 bp->b_error = error; 1602 bp->b_flags |= B_ERROR; 1603 VOP_UNLOCK(vnd->sc_vp, 0); 1604 splx(s); 1605 return; 1606 } 1607 /* uncompress the buffer */ 1608 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1609 vnd->sc_comp_stream.avail_in = length; 1610 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1611 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1612 inflateReset(&vnd->sc_comp_stream); 1613 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1614 if (error != Z_STREAM_END) { 1615 if (vnd->sc_comp_stream.msg) 1616 printf("%s: compressed file, %s\n", 1617 vnd->sc_xname, 1618 vnd->sc_comp_stream.msg); 1619 bp->b_error = EBADMSG; 1620 bp->b_flags |= B_ERROR; 1621 VOP_UNLOCK(vnd->sc_vp, 0); 1622 splx(s); 1623 return; 1624 } 1625 vnd->sc_comp_buffblk = comp_block; 1626 VOP_UNLOCK(vnd->sc_vp, 0); 1627 } 1628 1629 /* transfer the usable uncompressed data */ 1630 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1631 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1632 if (length_in_buffer > bp->b_resid) 1633 length_in_buffer = bp->b_resid; 1634 auio.uio_iov = &aiov; 1635 auio.uio_iovcnt = 1; 1636 aiov.iov_base = addr; 1637 aiov.iov_len = length_in_buffer; 1638 auio.uio_resid = aiov.iov_len; 1639 auio.uio_offset = 0; 1640 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1641 length_in_buffer, &auio); 1642 if (error) { 1643 bp->b_error = error; 1644 bp->b_flags |= B_ERROR; 1645 splx(s); 1646 return; 1647 } 1648 1649 bn += length_in_buffer; 1650 addr += length_in_buffer; 1651 bp->b_resid -= length_in_buffer; 1652 } 1653 splx(s); 1654 } 1655 1656 /* compression memory allocation routines */ 1657 static void * 1658 vnd_alloc(void *aux, u_int items, u_int siz) 1659 { 1660 return malloc(items * siz, M_TEMP, M_NOWAIT); 1661 } 1662 1663 static void 1664 vnd_free(void *aux, void *ptr) 1665 { 1666 free(ptr, M_TEMP); 1667 } 1668 #endif /* VND_COMPRESSION */ 1669