1 /* $NetBSD: vnd.c,v 1.122 2005/08/28 08:56:14 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 72 * 73 * @(#)vn.c 8.9 (Berkeley) 5/14/95 74 */ 75 76 /* 77 * Copyright (c) 1988 University of Utah. 78 * 79 * This code is derived from software contributed to Berkeley by 80 * the Systems Programming Group of the University of Utah Computer 81 * Science Department. 82 * 83 * Redistribution and use in source and binary forms, with or without 84 * modification, are permitted provided that the following conditions 85 * are met: 86 * 1. Redistributions of source code must retain the above copyright 87 * notice, this list of conditions and the following disclaimer. 88 * 2. Redistributions in binary form must reproduce the above copyright 89 * notice, this list of conditions and the following disclaimer in the 90 * documentation and/or other materials provided with the distribution. 91 * 3. All advertising materials mentioning features or use of this software 92 * must display the following acknowledgement: 93 * This product includes software developed by the University of 94 * California, Berkeley and its contributors. 95 * 4. Neither the name of the University nor the names of its contributors 96 * may be used to endorse or promote products derived from this software 97 * without specific prior written permission. 98 * 99 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 101 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 102 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 103 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 104 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 105 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 106 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 107 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 108 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 109 * SUCH DAMAGE. 110 * 111 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 112 * 113 * @(#)vn.c 8.9 (Berkeley) 5/14/95 114 */ 115 116 /* 117 * Vnode disk driver. 118 * 119 * Block/character interface to a vnode. Allows one to treat a file 120 * as a disk (e.g. build a filesystem in it, mount it, etc.). 121 * 122 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 123 * instead of a simple VOP_RDWR. We do this to avoid distorting the 124 * local buffer cache. 125 * 126 * NOTE 2: There is a security issue involved with this driver. 127 * Once mounted all access to the contents of the "mapped" file via 128 * the special file is controlled by the permissions on the special 129 * file, the protection of the mapped file is ignored (effectively, 130 * by using root credentials in all transactions). 131 * 132 * NOTE 3: Doesn't interact with leases, should it? 133 */ 134 135 #include <sys/cdefs.h> 136 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.122 2005/08/28 08:56:14 christos Exp $"); 137 138 #if defined(_KERNEL_OPT) 139 #include "fs_nfs.h" 140 #include "opt_vnd.h" 141 #endif 142 143 #include <sys/param.h> 144 #include <sys/systm.h> 145 #include <sys/namei.h> 146 #include <sys/proc.h> 147 #include <sys/kthread.h> 148 #include <sys/errno.h> 149 #include <sys/buf.h> 150 #include <sys/bufq.h> 151 #include <sys/malloc.h> 152 #include <sys/ioctl.h> 153 #include <sys/disklabel.h> 154 #include <sys/device.h> 155 #include <sys/disk.h> 156 #include <sys/stat.h> 157 #include <sys/mount.h> 158 #include <sys/vnode.h> 159 #include <sys/file.h> 160 #include <sys/uio.h> 161 #include <sys/conf.h> 162 #include <net/zlib.h> 163 164 #include <miscfs/specfs/specdev.h> 165 166 #include <dev/vndvar.h> 167 168 #if defined(VNDDEBUG) && !defined(DEBUG) 169 #define DEBUG 170 #endif 171 172 #ifdef DEBUG 173 int dovndcluster = 1; 174 #define VDB_FOLLOW 0x01 175 #define VDB_INIT 0x02 176 #define VDB_IO 0x04 177 #define VDB_LABEL 0x08 178 int vnddebug = 0x00; 179 #endif 180 181 #define vndunit(x) DISKUNIT(x) 182 183 struct vndxfer { 184 struct buf *vx_bp; /* Pointer to parent buffer */ 185 int vx_error; 186 int vx_pending; /* # of pending aux buffers */ 187 int vx_flags; 188 #define VX_BUSY 1 189 }; 190 191 struct vndbuf { 192 struct buf vb_buf; 193 struct vndxfer *vb_xfer; 194 }; 195 196 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 197 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 198 199 #define VND_GETBUF(vnd) pool_get(&(vnd)->sc_vbpool, PR_WAITOK) 200 #define VND_PUTBUF(vnd, vb) pool_put(&(vnd)->sc_vbpool, (vb)) 201 202 struct vnd_softc *vnd_softc; 203 int numvnd = 0; 204 205 #define VNDLABELDEV(dev) \ 206 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 207 208 /* called by main() at boot time (XXX: and the LKM driver) */ 209 void vndattach(int); 210 int vnddetach(void); 211 212 static void vndclear(struct vnd_softc *, int); 213 static int vndsetcred(struct vnd_softc *, struct ucred *); 214 static void vndthrottle(struct vnd_softc *, struct vnode *); 215 static void vndiodone(struct buf *); 216 #if 0 217 static void vndshutdown(void); 218 #endif 219 220 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 221 static void vndgetdisklabel(dev_t); 222 223 static int vndlock(struct vnd_softc *); 224 static void vndunlock(struct vnd_softc *); 225 #ifdef VND_COMPRESSION 226 static void compstrategy(struct buf *, off_t); 227 static void *vnd_alloc(void *, u_int, u_int); 228 static void vnd_free(void *, void *); 229 #endif /* VND_COMPRESSION */ 230 231 void vndthread(void *); 232 233 static dev_type_open(vndopen); 234 static dev_type_close(vndclose); 235 static dev_type_read(vndread); 236 static dev_type_write(vndwrite); 237 static dev_type_ioctl(vndioctl); 238 static dev_type_strategy(vndstrategy); 239 static dev_type_dump(vnddump); 240 static dev_type_size(vndsize); 241 242 const struct bdevsw vnd_bdevsw = { 243 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 244 }; 245 246 const struct cdevsw vnd_cdevsw = { 247 vndopen, vndclose, vndread, vndwrite, vndioctl, 248 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 249 }; 250 251 static int vndattached; 252 253 void 254 vndattach(int num) 255 { 256 int i; 257 char *mem; 258 259 if (vndattached) 260 return; 261 vndattached = 1; 262 if (num <= 0) 263 return; 264 i = num * sizeof(struct vnd_softc); 265 mem = malloc(i, M_DEVBUF, M_NOWAIT|M_ZERO); 266 if (mem == NULL) { 267 printf("WARNING: no memory for vnode disks\n"); 268 return; 269 } 270 vnd_softc = (struct vnd_softc *)mem; 271 numvnd = num; 272 273 for (i = 0; i < numvnd; i++) { 274 vnd_softc[i].sc_unit = i; 275 vnd_softc[i].sc_comp_offsets = NULL; 276 vnd_softc[i].sc_comp_buff = NULL; 277 vnd_softc[i].sc_comp_decombuf = NULL; 278 bufq_alloc(&vnd_softc[i].sc_tab, 279 BUFQ_DISKSORT|BUFQ_SORT_RAWBLOCK); 280 pseudo_disk_init(&vnd_softc[i].sc_dkdev); 281 } 282 } 283 284 int 285 vnddetach(void) 286 { 287 int i; 288 289 /* First check we aren't in use. */ 290 for (i = 0; i < numvnd; i++) 291 if (vnd_softc[i].sc_flags & VNF_INITED) 292 return (EBUSY); 293 294 for (i = 0; i < numvnd; i++) 295 bufq_free(&vnd_softc[i].sc_tab); 296 297 free(vnd_softc, M_DEVBUF); 298 vndattached = 0; 299 300 return (0); 301 } 302 303 static int 304 vndopen(dev_t dev, int flags, int mode, struct proc *p) 305 { 306 int unit = vndunit(dev); 307 struct vnd_softc *sc; 308 int error = 0, part, pmask; 309 struct disklabel *lp; 310 311 #ifdef DEBUG 312 if (vnddebug & VDB_FOLLOW) 313 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 314 #endif 315 if (unit >= numvnd) 316 return (ENXIO); 317 sc = &vnd_softc[unit]; 318 319 if ((error = vndlock(sc)) != 0) 320 return (error); 321 322 lp = sc->sc_dkdev.dk_label; 323 324 part = DISKPART(dev); 325 pmask = (1 << part); 326 327 /* 328 * If we're initialized, check to see if there are any other 329 * open partitions. If not, then it's safe to update the 330 * in-core disklabel. Only read the disklabel if it is 331 * not already valid. 332 */ 333 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 334 sc->sc_dkdev.dk_openmask == 0) 335 vndgetdisklabel(dev); 336 337 /* Check that the partitions exists. */ 338 if (part != RAW_PART) { 339 if (((sc->sc_flags & VNF_INITED) == 0) || 340 ((part >= lp->d_npartitions) || 341 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 342 error = ENXIO; 343 goto done; 344 } 345 } 346 347 /* Prevent our unit from being unconfigured while open. */ 348 switch (mode) { 349 case S_IFCHR: 350 sc->sc_dkdev.dk_copenmask |= pmask; 351 break; 352 353 case S_IFBLK: 354 sc->sc_dkdev.dk_bopenmask |= pmask; 355 break; 356 } 357 sc->sc_dkdev.dk_openmask = 358 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 359 360 done: 361 vndunlock(sc); 362 return (error); 363 } 364 365 static int 366 vndclose(dev_t dev, int flags, int mode, struct proc *p) 367 { 368 int unit = vndunit(dev); 369 struct vnd_softc *sc; 370 int error = 0, part; 371 372 #ifdef DEBUG 373 if (vnddebug & VDB_FOLLOW) 374 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 375 #endif 376 377 if (unit >= numvnd) 378 return (ENXIO); 379 sc = &vnd_softc[unit]; 380 381 if ((error = vndlock(sc)) != 0) 382 return (error); 383 384 part = DISKPART(dev); 385 386 /* ...that much closer to allowing unconfiguration... */ 387 switch (mode) { 388 case S_IFCHR: 389 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 390 break; 391 392 case S_IFBLK: 393 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 394 break; 395 } 396 sc->sc_dkdev.dk_openmask = 397 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 398 399 vndunlock(sc); 400 return (0); 401 } 402 403 /* 404 * Qeue the request, and wakeup the kernel thread to handle it. 405 */ 406 static void 407 vndstrategy(struct buf *bp) 408 { 409 int unit = vndunit(bp->b_dev); 410 struct vnd_softc *vnd = &vnd_softc[unit]; 411 struct disklabel *lp = vnd->sc_dkdev.dk_label; 412 int s = splbio(); 413 414 bp->b_resid = bp->b_bcount; 415 416 if ((vnd->sc_flags & VNF_INITED) == 0) { 417 bp->b_error = ENXIO; 418 bp->b_flags |= B_ERROR; 419 goto done; 420 } 421 422 /* 423 * The transfer must be a whole number of blocks. 424 */ 425 if ((bp->b_bcount % lp->d_secsize) != 0) { 426 bp->b_error = EINVAL; 427 bp->b_flags |= B_ERROR; 428 goto done; 429 } 430 431 /* 432 * check if we're read-only. 433 */ 434 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 435 bp->b_error = EACCES; 436 bp->b_flags |= B_ERROR; 437 goto done; 438 } 439 440 /* 441 * Do bounds checking and adjust transfer. If there's an error, 442 * the bounds check will flag that for us. 443 */ 444 if (DISKPART(bp->b_dev) != RAW_PART) { 445 if (bounds_check_with_label(&vnd->sc_dkdev, 446 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 447 goto done; 448 } 449 450 /* If it's a nil transfer, wake up the top half now. */ 451 if (bp->b_bcount == 0) 452 goto done; 453 #ifdef DEBUG 454 if (vnddebug & VDB_FOLLOW) 455 printf("vndstrategy(%p): unit %d\n", bp, unit); 456 #endif 457 BUFQ_PUT(&vnd->sc_tab, bp); 458 wakeup(&vnd->sc_tab); 459 splx(s); 460 return; 461 done: 462 biodone(bp); 463 splx(s); 464 } 465 466 void 467 vndthread(void *arg) 468 { 469 struct vnd_softc *vnd = arg; 470 struct buf *bp; 471 struct vndxfer *vnx; 472 struct mount *mp; 473 int s, bsize, resid; 474 off_t bn; 475 caddr_t addr; 476 int sz, flags, error; 477 struct disklabel *lp; 478 struct partition *pp; 479 480 s = splbio(); 481 vnd->sc_flags |= VNF_KTHREAD; 482 wakeup(&vnd->sc_kthread); 483 484 /* 485 * Dequeue requests, break them into bsize pieces and submit using 486 * VOP_BMAP/VOP_STRATEGY. 487 */ 488 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 489 bp = BUFQ_GET(&vnd->sc_tab); 490 if (bp == NULL) { 491 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 492 continue; 493 }; 494 splx(s); 495 496 #ifdef DEBUG 497 if (vnddebug & VDB_FOLLOW) 498 printf("vndthread(%p\n", bp); 499 #endif 500 lp = vnd->sc_dkdev.dk_label; 501 bp->b_resid = bp->b_bcount; 502 503 /* 504 * Put the block number in terms of the logical blocksize 505 * of the "device". 506 */ 507 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 508 509 /* 510 * Translate the partition-relative block number to an absolute. 511 */ 512 if (DISKPART(bp->b_dev) != RAW_PART) { 513 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 514 DISKPART(bp->b_dev)]; 515 bn += pp->p_offset; 516 } 517 518 /* ...and convert to a byte offset within the file. */ 519 bn *= lp->d_secsize; 520 521 if (vnd->sc_vp->v_mount == NULL) { 522 bp->b_error = ENXIO; 523 bp->b_flags |= B_ERROR; 524 goto done; 525 } 526 #ifdef VND_COMPRESSION 527 /* handle a compressed read */ 528 if ((bp->b_flags & B_READ) && (vnd->sc_flags & VNF_COMP)) { 529 compstrategy(bp, bn); 530 goto done; 531 } 532 #endif /* VND_COMPRESSION */ 533 534 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 535 addr = bp->b_data; 536 flags = (bp->b_flags & (B_READ|B_ASYNC)) | B_CALL; 537 538 /* 539 * Allocate a header for this transfer and link it to the 540 * buffer 541 */ 542 s = splbio(); 543 vnx = VND_GETXFER(vnd); 544 splx(s); 545 vnx->vx_flags = VX_BUSY; 546 vnx->vx_error = 0; 547 vnx->vx_pending = 0; 548 vnx->vx_bp = bp; 549 550 if ((flags & B_READ) == 0) 551 vn_start_write(vnd->sc_vp, &mp, V_WAIT); 552 553 /* 554 * Feed requests sequentially. 555 * We do it this way to keep from flooding NFS servers if we 556 * are connected to an NFS file. This places the burden on 557 * the client rather than the server. 558 */ 559 for (resid = bp->b_resid; resid; resid -= sz) { 560 struct vndbuf *nbp; 561 struct vnode *vp; 562 daddr_t nbn; 563 int off, nra; 564 565 nra = 0; 566 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 567 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 568 VOP_UNLOCK(vnd->sc_vp, 0); 569 570 if (error == 0 && (long)nbn == -1) 571 error = EIO; 572 573 /* 574 * If there was an error or a hole in the file...punt. 575 * Note that we may have to wait for any operations 576 * that we have already fired off before releasing 577 * the buffer. 578 * 579 * XXX we could deal with holes here but it would be 580 * a hassle (in the write case). 581 */ 582 if (error) { 583 s = splbio(); 584 vnx->vx_error = error; 585 goto out; 586 } 587 588 #ifdef DEBUG 589 if (!dovndcluster) 590 nra = 0; 591 #endif 592 593 if ((off = bn % bsize) != 0) 594 sz = bsize - off; 595 else 596 sz = (1 + nra) * bsize; 597 if (resid < sz) 598 sz = resid; 599 #ifdef DEBUG 600 if (vnddebug & VDB_IO) 601 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 602 " sz 0x%x\n", 603 vnd->sc_vp, vp, (long long)bn, nbn, sz); 604 #endif 605 606 s = splbio(); 607 while (vnd->sc_active >= vnd->sc_maxactive) { 608 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 609 } 610 vnd->sc_active++; 611 nbp = VND_GETBUF(vnd); 612 splx(s); 613 BUF_INIT(&nbp->vb_buf); 614 nbp->vb_buf.b_flags = flags; 615 nbp->vb_buf.b_bcount = sz; 616 nbp->vb_buf.b_bufsize = round_page((ulong)addr + sz) 617 - trunc_page((ulong) addr); 618 nbp->vb_buf.b_error = 0; 619 nbp->vb_buf.b_data = addr; 620 nbp->vb_buf.b_blkno = nbp->vb_buf.b_rawblkno = nbn + btodb(off); 621 nbp->vb_buf.b_proc = bp->b_proc; 622 nbp->vb_buf.b_iodone = vndiodone; 623 nbp->vb_buf.b_vp = vp; 624 625 nbp->vb_xfer = vnx; 626 627 BIO_COPYPRIO(&nbp->vb_buf, bp); 628 629 /* 630 * Just sort by block number 631 */ 632 s = splbio(); 633 if (vnx->vx_error != 0) { 634 VND_PUTBUF(vnd, nbp); 635 goto out; 636 } 637 vnx->vx_pending++; 638 #ifdef DEBUG 639 if (vnddebug & VDB_IO) 640 printf("vndstart(%ld): bp %p vp %p blkno " 641 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 642 (long) (vnd-vnd_softc), &nbp->vb_buf, 643 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 644 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 645 nbp->vb_buf.b_bcount); 646 #endif 647 648 /* Instrumentation. */ 649 disk_busy(&vnd->sc_dkdev); 650 651 if ((nbp->vb_buf.b_flags & B_READ) == 0) 652 vp->v_numoutput++; 653 VOP_STRATEGY(vp, &nbp->vb_buf); 654 655 splx(s); 656 bn += sz; 657 addr += sz; 658 } 659 660 s = splbio(); 661 662 out: /* Arrive here at splbio */ 663 if ((flags & B_READ) == 0) 664 vn_finished_write(mp, 0); 665 vnx->vx_flags &= ~VX_BUSY; 666 if (vnx->vx_pending == 0) { 667 if (vnx->vx_error != 0) { 668 bp->b_error = vnx->vx_error; 669 bp->b_flags |= B_ERROR; 670 } 671 VND_PUTXFER(vnd, vnx); 672 biodone(bp); 673 } 674 continue; 675 done: 676 biodone(bp); 677 s = splbio(); 678 } 679 680 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 681 wakeup(&vnd->sc_kthread); 682 splx(s); 683 kthread_exit(0); 684 } 685 686 687 static void 688 vndiodone(struct buf *bp) 689 { 690 struct vndbuf *vbp = (struct vndbuf *) bp; 691 struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; 692 struct buf *pbp = vnx->vx_bp; 693 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 694 int s, resid; 695 696 s = splbio(); 697 #ifdef DEBUG 698 if (vnddebug & VDB_IO) 699 printf("vndiodone(%ld): vbp %p vp %p blkno 0x%" PRIx64 700 " addr %p cnt 0x%x\n", 701 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp, 702 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data, 703 vbp->vb_buf.b_bcount); 704 #endif 705 706 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 707 pbp->b_resid -= resid; 708 disk_unbusy(&vnd->sc_dkdev, resid, (pbp->b_flags & B_READ)); 709 vnx->vx_pending--; 710 711 if (vbp->vb_buf.b_error) { 712 #ifdef DEBUG 713 if (vnddebug & VDB_IO) 714 printf("vndiodone: vbp %p error %d\n", vbp, 715 vbp->vb_buf.b_error); 716 #endif 717 vnx->vx_error = vbp->vb_buf.b_error; 718 } 719 720 VND_PUTBUF(vnd, vbp); 721 722 /* 723 * Wrap up this transaction if it has run to completion or, in 724 * case of an error, when all auxiliary buffers have returned. 725 */ 726 if (vnx->vx_error != 0) { 727 pbp->b_flags |= B_ERROR; 728 pbp->b_error = vnx->vx_error; 729 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 730 731 #ifdef DEBUG 732 if (vnddebug & VDB_IO) 733 printf("vndiodone: pbp %p iodone: error %d\n", 734 pbp, vnx->vx_error); 735 #endif 736 VND_PUTXFER(vnd, vnx); 737 biodone(pbp); 738 } 739 } else if (pbp->b_resid == 0) { 740 741 #ifdef DIAGNOSTIC 742 if (vnx->vx_pending != 0) 743 panic("vndiodone: vnx pending: %d", vnx->vx_pending); 744 #endif 745 746 if ((vnx->vx_flags & VX_BUSY) == 0) { 747 #ifdef DEBUG 748 if (vnddebug & VDB_IO) 749 printf("vndiodone: pbp %p iodone\n", pbp); 750 #endif 751 VND_PUTXFER(vnd, vnx); 752 biodone(pbp); 753 } 754 } 755 756 vnd->sc_active--; 757 wakeup(&vnd->sc_tab); 758 splx(s); 759 } 760 761 /* ARGSUSED */ 762 static int 763 vndread(dev_t dev, struct uio *uio, int flags) 764 { 765 int unit = vndunit(dev); 766 struct vnd_softc *sc; 767 768 #ifdef DEBUG 769 if (vnddebug & VDB_FOLLOW) 770 printf("vndread(0x%x, %p)\n", dev, uio); 771 #endif 772 773 if (unit >= numvnd) 774 return (ENXIO); 775 sc = &vnd_softc[unit]; 776 777 if ((sc->sc_flags & VNF_INITED) == 0) 778 return (ENXIO); 779 780 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 781 } 782 783 /* ARGSUSED */ 784 static int 785 vndwrite(dev_t dev, struct uio *uio, int flags) 786 { 787 int unit = vndunit(dev); 788 struct vnd_softc *sc; 789 790 #ifdef DEBUG 791 if (vnddebug & VDB_FOLLOW) 792 printf("vndwrite(0x%x, %p)\n", dev, uio); 793 #endif 794 795 if (unit >= numvnd) 796 return (ENXIO); 797 sc = &vnd_softc[unit]; 798 799 if ((sc->sc_flags & VNF_INITED) == 0) 800 return (ENXIO); 801 802 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 803 } 804 805 static int 806 vnd_cget(struct proc *p, int unit, int *un, struct vattr *va) 807 { 808 struct vnd_softc *vnd; 809 810 if (*un == -1) 811 *un = unit; 812 if (*un >= numvnd) 813 return ENXIO; 814 if (*un < 0) 815 return EINVAL; 816 817 vnd = &vnd_softc[*un]; 818 819 if ((vnd->sc_flags & VNF_INITED) == 0) 820 return -1; 821 822 return VOP_GETATTR(vnd->sc_vp, va, p->p_ucred, p); 823 } 824 825 /* ARGSUSED */ 826 static int 827 vndioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) 828 { 829 int unit = vndunit(dev); 830 struct vnd_softc *vnd; 831 struct vnd_ioctl *vio; 832 struct vattr vattr; 833 struct nameidata nd; 834 int error, part, pmask; 835 size_t geomsize; 836 int fflags; 837 #ifdef __HAVE_OLD_DISKLABEL 838 struct disklabel newlabel; 839 #endif 840 841 #ifdef DEBUG 842 if (vnddebug & VDB_FOLLOW) 843 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 844 dev, cmd, data, flag, p, unit); 845 #endif 846 if (unit >= numvnd) 847 return (ENXIO); 848 849 vnd = &vnd_softc[unit]; 850 vio = (struct vnd_ioctl *)data; 851 852 /* Must be open for writes for these commands... */ 853 switch (cmd) { 854 case VNDIOCSET: 855 case VNDIOCCLR: 856 case DIOCSDINFO: 857 case DIOCWDINFO: 858 #ifdef __HAVE_OLD_DISKLABEL 859 case ODIOCSDINFO: 860 case ODIOCWDINFO: 861 #endif 862 case DIOCKLABEL: 863 case DIOCWLABEL: 864 if ((flag & FWRITE) == 0) 865 return (EBADF); 866 } 867 868 /* Must be initialized for these... */ 869 switch (cmd) { 870 case VNDIOCCLR: 871 case DIOCGDINFO: 872 case DIOCSDINFO: 873 case DIOCWDINFO: 874 case DIOCGPART: 875 case DIOCKLABEL: 876 case DIOCWLABEL: 877 case DIOCGDEFLABEL: 878 #ifdef __HAVE_OLD_DISKLABEL 879 case ODIOCGDINFO: 880 case ODIOCSDINFO: 881 case ODIOCWDINFO: 882 case ODIOCGDEFLABEL: 883 #endif 884 if ((vnd->sc_flags & VNF_INITED) == 0) 885 return (ENXIO); 886 } 887 888 switch (cmd) { 889 case VNDIOCSET: 890 if (vnd->sc_flags & VNF_INITED) 891 return (EBUSY); 892 893 if ((error = vndlock(vnd)) != 0) 894 return (error); 895 896 fflags = FREAD; 897 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 898 fflags |= FWRITE; 899 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 900 if ((error = vn_open(&nd, fflags, 0)) != 0) 901 goto unlock_and_exit; 902 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 903 if (!error && nd.ni_vp->v_type != VREG) 904 error = EOPNOTSUPP; 905 if (error) { 906 VOP_UNLOCK(nd.ni_vp, 0); 907 goto close_and_exit; 908 } 909 910 /* If using a compressed file, initialize its info */ 911 /* (or abort with an error if kernel has no compression) */ 912 if (vio->vnd_flags & VNF_COMP) { 913 #ifdef VND_COMPRESSION 914 struct vnd_comp_header *ch; 915 int i; 916 u_int32_t comp_size; 917 u_int32_t comp_maxsize; 918 919 /* allocate space for compresed file header */ 920 ch = malloc(sizeof(struct vnd_comp_header), 921 M_TEMP, M_WAITOK); 922 923 /* read compressed file header */ 924 error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)ch, 925 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 926 IO_UNIT|IO_NODELOCKED, p->p_ucred, NULL, NULL); 927 if(error) { 928 free(ch, M_TEMP); 929 VOP_UNLOCK(nd.ni_vp, 0); 930 goto close_and_exit; 931 } 932 933 /* save some header info */ 934 vnd->sc_comp_blksz = ntohl(ch->block_size); 935 /* note last offset is the file byte size */ 936 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 937 free(ch, M_TEMP); 938 if(vnd->sc_comp_blksz % DEV_BSIZE !=0) { 939 VOP_UNLOCK(nd.ni_vp, 0); 940 error = EINVAL; 941 goto close_and_exit; 942 } 943 if(sizeof(struct vnd_comp_header) + 944 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 945 vattr.va_size) { 946 VOP_UNLOCK(nd.ni_vp, 0); 947 error = EINVAL; 948 goto close_and_exit; 949 } 950 951 /* set decompressed file size */ 952 vattr.va_size = 953 (vnd->sc_comp_numoffs - 1) * vnd->sc_comp_blksz; 954 955 /* allocate space for all the compressed offsets */ 956 vnd->sc_comp_offsets = 957 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 958 M_DEVBUF, M_WAITOK); 959 960 /* read in the offsets */ 961 error = vn_rdwr(UIO_READ, nd.ni_vp, 962 (caddr_t)vnd->sc_comp_offsets, 963 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 964 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 965 IO_UNIT|IO_NODELOCKED, p->p_ucred, NULL, NULL); 966 if(error) { 967 VOP_UNLOCK(nd.ni_vp, 0); 968 goto close_and_exit; 969 } 970 /* 971 * find largest block size (used for allocation limit). 972 * Also convert offset to native byte order. 973 */ 974 comp_maxsize = 0; 975 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 976 vnd->sc_comp_offsets[i] = 977 be64toh(vnd->sc_comp_offsets[i]); 978 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 979 - vnd->sc_comp_offsets[i]; 980 if (comp_size > comp_maxsize) 981 comp_maxsize = comp_size; 982 } 983 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 984 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 985 986 /* create compressed data buffer */ 987 vnd->sc_comp_buff = malloc(comp_maxsize, 988 M_DEVBUF, M_WAITOK); 989 990 /* create decompressed buffer */ 991 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 992 M_DEVBUF, M_WAITOK); 993 vnd->sc_comp_buffblk = -1; 994 995 /* Initialize decompress stream */ 996 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 997 vnd->sc_comp_stream.zalloc = vnd_alloc; 998 vnd->sc_comp_stream.zfree = vnd_free; 999 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1000 if(error) { 1001 if(vnd->sc_comp_stream.msg) 1002 printf("vnd%d: compressed file, %s\n", 1003 unit, vnd->sc_comp_stream.msg); 1004 VOP_UNLOCK(nd.ni_vp, 0); 1005 error = EINVAL; 1006 goto close_and_exit; 1007 } 1008 1009 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1010 #else /* !VND_COMPRESSION */ 1011 VOP_UNLOCK(nd.ni_vp, 0); 1012 error = EOPNOTSUPP; 1013 goto close_and_exit; 1014 #endif /* VND_COMPRESSION */ 1015 } 1016 1017 VOP_UNLOCK(nd.ni_vp, 0); 1018 vnd->sc_vp = nd.ni_vp; 1019 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1020 1021 /* 1022 * Use pseudo-geometry specified. If none was provided, 1023 * use "standard" Adaptec fictitious geometry. 1024 */ 1025 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1026 1027 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1028 sizeof(vio->vnd_geom)); 1029 1030 /* 1031 * Sanity-check the sector size. 1032 * XXX Don't allow secsize < DEV_BSIZE. Should 1033 * XXX we? 1034 */ 1035 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 1036 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 1037 vnd->sc_geom.vng_ncylinders == 0 || 1038 (vnd->sc_geom.vng_ntracks * 1039 vnd->sc_geom.vng_nsectors) == 0) { 1040 error = EINVAL; 1041 goto close_and_exit; 1042 } 1043 1044 /* 1045 * Compute the size (in DEV_BSIZE blocks) specified 1046 * by the geometry. 1047 */ 1048 geomsize = (vnd->sc_geom.vng_nsectors * 1049 vnd->sc_geom.vng_ntracks * 1050 vnd->sc_geom.vng_ncylinders) * 1051 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1052 1053 /* 1054 * Sanity-check the size against the specified 1055 * geometry. 1056 */ 1057 if (vnd->sc_size < geomsize) { 1058 error = EINVAL; 1059 goto close_and_exit; 1060 } 1061 } else if (vnd->sc_size >= (32 * 64)) { 1062 /* 1063 * Size must be at least 2048 DEV_BSIZE blocks 1064 * (1M) in order to use this geometry. 1065 */ 1066 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1067 vnd->sc_geom.vng_nsectors = 32; 1068 vnd->sc_geom.vng_ntracks = 64; 1069 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1070 } else { 1071 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1072 vnd->sc_geom.vng_nsectors = 1; 1073 vnd->sc_geom.vng_ntracks = 1; 1074 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1075 } 1076 1077 if (vio->vnd_flags & VNDIOF_READONLY) { 1078 vnd->sc_flags |= VNF_READONLY; 1079 } 1080 1081 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) 1082 goto close_and_exit; 1083 1084 memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */ 1085 snprintf(vnd->sc_xname, sizeof(vnd->sc_xname), "vnd%d", unit); 1086 1087 1088 vndthrottle(vnd, vnd->sc_vp); 1089 vio->vnd_size = dbtob(vnd->sc_size); 1090 vnd->sc_flags |= VNF_INITED; 1091 1092 /* create the kernel thread, wait for it to be up */ 1093 error = kthread_create1(vndthread, vnd, &vnd->sc_kthread, 1094 vnd->sc_xname); 1095 if (error) 1096 goto close_and_exit; 1097 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1098 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1099 } 1100 #ifdef DEBUG 1101 if (vnddebug & VDB_INIT) 1102 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1103 vnd->sc_vp, (unsigned long) vnd->sc_size, 1104 vnd->sc_geom.vng_secsize, 1105 vnd->sc_geom.vng_nsectors, 1106 vnd->sc_geom.vng_ntracks, 1107 vnd->sc_geom.vng_ncylinders); 1108 #endif 1109 1110 /* Attach the disk. */ 1111 vnd->sc_dkdev.dk_name = vnd->sc_xname; 1112 pseudo_disk_attach(&vnd->sc_dkdev); 1113 1114 /* Initialize the xfer and buffer pools. */ 1115 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1116 0, 0, "vndxpl", NULL); 1117 pool_init(&vnd->sc_vbpool, sizeof(struct vndbuf), 0, 1118 0, 0, "vndbpl", NULL); 1119 1120 /* Try and read the disklabel. */ 1121 vndgetdisklabel(dev); 1122 1123 vndunlock(vnd); 1124 1125 break; 1126 1127 close_and_exit: 1128 (void) vn_close(nd.ni_vp, fflags, p->p_ucred, p); 1129 unlock_and_exit: 1130 #ifdef VND_COMPRESSION 1131 /* free any allocated memory (for compressed file) */ 1132 if(vnd->sc_comp_offsets) { 1133 free(vnd->sc_comp_offsets, M_DEVBUF); 1134 vnd->sc_comp_offsets = NULL; 1135 } 1136 if(vnd->sc_comp_buff) { 1137 free(vnd->sc_comp_buff, M_DEVBUF); 1138 vnd->sc_comp_buff = NULL; 1139 } 1140 if(vnd->sc_comp_decombuf) { 1141 free(vnd->sc_comp_decombuf, M_DEVBUF); 1142 vnd->sc_comp_decombuf = NULL; 1143 } 1144 #endif /* VND_COMPRESSION */ 1145 vndunlock(vnd); 1146 return (error); 1147 1148 case VNDIOCCLR: 1149 if ((error = vndlock(vnd)) != 0) 1150 return (error); 1151 1152 /* 1153 * Don't unconfigure if any other partitions are open 1154 * or if both the character and block flavors of this 1155 * partition are open. 1156 */ 1157 part = DISKPART(dev); 1158 pmask = (1 << part); 1159 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1160 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1161 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1162 !(vio->vnd_flags & VNDIOF_FORCE)) { 1163 vndunlock(vnd); 1164 return (EBUSY); 1165 } 1166 1167 /* 1168 * XXX vndclear() might call vndclose() implicitely; 1169 * release lock to avoid recursion 1170 */ 1171 vndunlock(vnd); 1172 vndclear(vnd, minor(dev)); 1173 #ifdef DEBUG 1174 if (vnddebug & VDB_INIT) 1175 printf("vndioctl: CLRed\n"); 1176 #endif 1177 1178 /* Destroy the xfer and buffer pools. */ 1179 pool_destroy(&vnd->sc_vxpool); 1180 pool_destroy(&vnd->sc_vbpool); 1181 1182 /* Detatch the disk. */ 1183 pseudo_disk_detach(&vnd->sc_dkdev); 1184 1185 break; 1186 1187 #ifdef COMPAT_30 1188 case VNDIOOCGET: { 1189 struct vnd_ouser *vnu; 1190 struct vattr va; 1191 vnu = (struct vnd_ouser *)data; 1192 switch (error = vnd_cget(p, unit, &vnu->vnu_unit, &va)) { 1193 case 0: 1194 vnu->vnu_dev = va.va_fsid; 1195 vnu->vnu_ino = va.va_fileid; 1196 break; 1197 case -1: 1198 /* unused is not an error */ 1199 vnu->vnu_dev = 0; 1200 vnu->vnu_ino = 0; 1201 break; 1202 default: 1203 return error; 1204 } 1205 break; 1206 } 1207 #endif 1208 case VNDIOCGET: { 1209 struct vnd_user *vnu; 1210 struct vattr va; 1211 vnu = (struct vnd_user *)data; 1212 switch (error = vnd_cget(p, unit, &vnu->vnu_unit, &va)) { 1213 case 0: 1214 vnu->vnu_dev = va.va_fsid; 1215 vnu->vnu_ino = va.va_fileid; 1216 break; 1217 case -1: 1218 /* unused is not an error */ 1219 vnu->vnu_dev = 0; 1220 vnu->vnu_ino = 0; 1221 break; 1222 default: 1223 return error; 1224 } 1225 break; 1226 } 1227 1228 case DIOCGDINFO: 1229 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1230 break; 1231 1232 #ifdef __HAVE_OLD_DISKLABEL 1233 case ODIOCGDINFO: 1234 newlabel = *(vnd->sc_dkdev.dk_label); 1235 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1236 return ENOTTY; 1237 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1238 break; 1239 #endif 1240 1241 case DIOCGPART: 1242 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1243 ((struct partinfo *)data)->part = 1244 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1245 break; 1246 1247 case DIOCWDINFO: 1248 case DIOCSDINFO: 1249 #ifdef __HAVE_OLD_DISKLABEL 1250 case ODIOCWDINFO: 1251 case ODIOCSDINFO: 1252 #endif 1253 { 1254 struct disklabel *lp; 1255 1256 if ((error = vndlock(vnd)) != 0) 1257 return (error); 1258 1259 vnd->sc_flags |= VNF_LABELLING; 1260 1261 #ifdef __HAVE_OLD_DISKLABEL 1262 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1263 memset(&newlabel, 0, sizeof newlabel); 1264 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1265 lp = &newlabel; 1266 } else 1267 #endif 1268 lp = (struct disklabel *)data; 1269 1270 error = setdisklabel(vnd->sc_dkdev.dk_label, 1271 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1272 if (error == 0) { 1273 if (cmd == DIOCWDINFO 1274 #ifdef __HAVE_OLD_DISKLABEL 1275 || cmd == ODIOCWDINFO 1276 #endif 1277 ) 1278 error = writedisklabel(VNDLABELDEV(dev), 1279 vndstrategy, vnd->sc_dkdev.dk_label, 1280 vnd->sc_dkdev.dk_cpulabel); 1281 } 1282 1283 vnd->sc_flags &= ~VNF_LABELLING; 1284 1285 vndunlock(vnd); 1286 1287 if (error) 1288 return (error); 1289 break; 1290 } 1291 1292 case DIOCKLABEL: 1293 if (*(int *)data != 0) 1294 vnd->sc_flags |= VNF_KLABEL; 1295 else 1296 vnd->sc_flags &= ~VNF_KLABEL; 1297 break; 1298 1299 case DIOCWLABEL: 1300 if (*(int *)data != 0) 1301 vnd->sc_flags |= VNF_WLABEL; 1302 else 1303 vnd->sc_flags &= ~VNF_WLABEL; 1304 break; 1305 1306 case DIOCGDEFLABEL: 1307 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1308 break; 1309 1310 #ifdef __HAVE_OLD_DISKLABEL 1311 case ODIOCGDEFLABEL: 1312 vndgetdefaultlabel(vnd, &newlabel); 1313 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1314 return ENOTTY; 1315 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1316 break; 1317 #endif 1318 1319 default: 1320 return (ENOTTY); 1321 } 1322 1323 return (0); 1324 } 1325 1326 /* 1327 * Duplicate the current processes' credentials. Since we are called only 1328 * as the result of a SET ioctl and only root can do that, any future access 1329 * to this "disk" is essentially as root. Note that credentials may change 1330 * if some other uid can write directly to the mapped file (NFS). 1331 */ 1332 static int 1333 vndsetcred(struct vnd_softc *vnd, struct ucred *cred) 1334 { 1335 struct uio auio; 1336 struct iovec aiov; 1337 char *tmpbuf; 1338 int error; 1339 1340 vnd->sc_cred = crdup(cred); 1341 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1342 1343 /* XXX: Horrible kludge to establish credentials for NFS */ 1344 aiov.iov_base = tmpbuf; 1345 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1346 auio.uio_iov = &aiov; 1347 auio.uio_iovcnt = 1; 1348 auio.uio_offset = 0; 1349 auio.uio_rw = UIO_READ; 1350 auio.uio_segflg = UIO_SYSSPACE; 1351 auio.uio_resid = aiov.iov_len; 1352 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1353 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1354 if (error == 0) { 1355 /* 1356 * Because vnd does all IO directly through the vnode 1357 * we need to flush (at least) the buffer from the above 1358 * VOP_READ from the buffer cache to prevent cache 1359 * incoherencies. Also, be careful to write dirty 1360 * buffers back to stable storage. 1361 */ 1362 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1363 curproc, 0, 0); 1364 } 1365 VOP_UNLOCK(vnd->sc_vp, 0); 1366 1367 free(tmpbuf, M_TEMP); 1368 return (error); 1369 } 1370 1371 /* 1372 * Set maxactive based on FS type 1373 */ 1374 static void 1375 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1376 { 1377 #ifdef NFS 1378 extern int (**nfsv2_vnodeop_p)(void *); 1379 1380 if (vp->v_op == nfsv2_vnodeop_p) 1381 vnd->sc_maxactive = 2; 1382 else 1383 #endif 1384 vnd->sc_maxactive = 8; 1385 1386 if (vnd->sc_maxactive < 1) 1387 vnd->sc_maxactive = 1; 1388 } 1389 1390 #if 0 1391 static void 1392 vndshutdown(void) 1393 { 1394 struct vnd_softc *vnd; 1395 1396 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1397 if (vnd->sc_flags & VNF_INITED) 1398 vndclear(vnd); 1399 } 1400 #endif 1401 1402 static void 1403 vndclear(struct vnd_softc *vnd, int myminor) 1404 { 1405 struct vnode *vp = vnd->sc_vp; 1406 struct proc *p = curproc; /* XXX */ 1407 int fflags = FREAD; 1408 int bmaj, cmaj, i, mn; 1409 int s; 1410 1411 #ifdef DEBUG 1412 if (vnddebug & VDB_FOLLOW) 1413 printf("vndclear(%p): vp %p\n", vnd, vp); 1414 #endif 1415 /* locate the major number */ 1416 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1417 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1418 1419 /* Nuke the vnodes for any open instances */ 1420 for (i = 0; i < MAXPARTITIONS; i++) { 1421 mn = DISKMINOR(vnd->sc_unit, i); 1422 vdevgone(bmaj, mn, mn, VBLK); 1423 if (mn != myminor) /* XXX avoid to kill own vnode */ 1424 vdevgone(cmaj, mn, mn, VCHR); 1425 } 1426 1427 if ((vnd->sc_flags & VNF_READONLY) == 0) 1428 fflags |= FWRITE; 1429 1430 s = splbio(); 1431 bufq_drain(&vnd->sc_tab); 1432 splx(s); 1433 1434 vnd->sc_flags |= VNF_VUNCONF; 1435 wakeup(&vnd->sc_tab); 1436 while (vnd->sc_flags & VNF_KTHREAD) 1437 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1438 1439 #ifdef VND_COMPRESSION 1440 /* free the compressed file buffers */ 1441 if(vnd->sc_flags & VNF_COMP) { 1442 if(vnd->sc_comp_offsets) { 1443 free(vnd->sc_comp_offsets, M_DEVBUF); 1444 vnd->sc_comp_offsets = NULL; 1445 } 1446 if(vnd->sc_comp_buff) { 1447 free(vnd->sc_comp_buff, M_DEVBUF); 1448 vnd->sc_comp_buff = NULL; 1449 } 1450 if(vnd->sc_comp_decombuf) { 1451 free(vnd->sc_comp_decombuf, M_DEVBUF); 1452 vnd->sc_comp_decombuf = NULL; 1453 } 1454 } 1455 #endif /* VND_COMPRESSION */ 1456 vnd->sc_flags &= 1457 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1458 | VNF_VUNCONF | VNF_COMP); 1459 if (vp == (struct vnode *)0) 1460 panic("vndclear: null vp"); 1461 (void) vn_close(vp, fflags, vnd->sc_cred, p); 1462 crfree(vnd->sc_cred); 1463 vnd->sc_vp = (struct vnode *)0; 1464 vnd->sc_cred = (struct ucred *)0; 1465 vnd->sc_size = 0; 1466 } 1467 1468 static int 1469 vndsize(dev_t dev) 1470 { 1471 struct vnd_softc *sc; 1472 struct disklabel *lp; 1473 int part, unit, omask; 1474 int size; 1475 1476 unit = vndunit(dev); 1477 if (unit >= numvnd) 1478 return (-1); 1479 sc = &vnd_softc[unit]; 1480 1481 if ((sc->sc_flags & VNF_INITED) == 0) 1482 return (-1); 1483 1484 part = DISKPART(dev); 1485 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1486 lp = sc->sc_dkdev.dk_label; 1487 1488 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curproc)) 1489 return (-1); 1490 1491 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1492 size = -1; 1493 else 1494 size = lp->d_partitions[part].p_size * 1495 (lp->d_secsize / DEV_BSIZE); 1496 1497 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curproc)) 1498 return (-1); 1499 1500 return (size); 1501 } 1502 1503 static int 1504 vnddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 1505 { 1506 1507 /* Not implemented. */ 1508 return ENXIO; 1509 } 1510 1511 static void 1512 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1513 { 1514 struct vndgeom *vng = &sc->sc_geom; 1515 struct partition *pp; 1516 1517 memset(lp, 0, sizeof(*lp)); 1518 1519 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1520 lp->d_secsize = vng->vng_secsize; 1521 lp->d_nsectors = vng->vng_nsectors; 1522 lp->d_ntracks = vng->vng_ntracks; 1523 lp->d_ncylinders = vng->vng_ncylinders; 1524 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1525 1526 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1527 lp->d_type = DTYPE_VND; 1528 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1529 lp->d_rpm = 3600; 1530 lp->d_interleave = 1; 1531 lp->d_flags = 0; 1532 1533 pp = &lp->d_partitions[RAW_PART]; 1534 pp->p_offset = 0; 1535 pp->p_size = lp->d_secperunit; 1536 pp->p_fstype = FS_UNUSED; 1537 lp->d_npartitions = RAW_PART + 1; 1538 1539 lp->d_magic = DISKMAGIC; 1540 lp->d_magic2 = DISKMAGIC; 1541 lp->d_checksum = dkcksum(lp); 1542 } 1543 1544 /* 1545 * Read the disklabel from a vnd. If one is not present, create a fake one. 1546 */ 1547 static void 1548 vndgetdisklabel(dev_t dev) 1549 { 1550 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1551 const char *errstring; 1552 struct disklabel *lp = sc->sc_dkdev.dk_label; 1553 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1554 int i; 1555 1556 memset(clp, 0, sizeof(*clp)); 1557 1558 vndgetdefaultlabel(sc, lp); 1559 1560 /* 1561 * Call the generic disklabel extraction routine. 1562 */ 1563 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1564 if (errstring) { 1565 /* 1566 * Lack of disklabel is common, but we print the warning 1567 * anyway, since it might contain other useful information. 1568 */ 1569 printf("%s: %s\n", sc->sc_xname, errstring); 1570 1571 /* 1572 * For historical reasons, if there's no disklabel 1573 * present, all partitions must be FS_BSDFFS and 1574 * occupy the entire disk. 1575 */ 1576 for (i = 0; i < MAXPARTITIONS; i++) { 1577 /* 1578 * Don't wipe out port specific hack (such as 1579 * dos partition hack of i386 port). 1580 */ 1581 if (lp->d_partitions[i].p_size != 0) 1582 continue; 1583 1584 lp->d_partitions[i].p_size = lp->d_secperunit; 1585 lp->d_partitions[i].p_offset = 0; 1586 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1587 } 1588 1589 strncpy(lp->d_packname, "default label", 1590 sizeof(lp->d_packname)); 1591 1592 lp->d_npartitions = MAXPARTITIONS; 1593 lp->d_checksum = dkcksum(lp); 1594 } 1595 1596 /* In-core label now valid. */ 1597 sc->sc_flags |= VNF_VLABEL; 1598 } 1599 1600 /* 1601 * Wait interruptibly for an exclusive lock. 1602 * 1603 * XXX 1604 * Several drivers do this; it should be abstracted and made MP-safe. 1605 */ 1606 static int 1607 vndlock(struct vnd_softc *sc) 1608 { 1609 int error; 1610 1611 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1612 sc->sc_flags |= VNF_WANTED; 1613 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1614 return (error); 1615 } 1616 sc->sc_flags |= VNF_LOCKED; 1617 return (0); 1618 } 1619 1620 /* 1621 * Unlock and wake up any waiters. 1622 */ 1623 static void 1624 vndunlock(struct vnd_softc *sc) 1625 { 1626 1627 sc->sc_flags &= ~VNF_LOCKED; 1628 if ((sc->sc_flags & VNF_WANTED) != 0) { 1629 sc->sc_flags &= ~VNF_WANTED; 1630 wakeup(sc); 1631 } 1632 } 1633 1634 #ifdef VND_COMPRESSION 1635 /* compressed file read */ 1636 static void 1637 compstrategy(struct buf *bp, off_t bn) 1638 { 1639 int error; 1640 int unit = vndunit(bp->b_dev); 1641 struct vnd_softc *vnd = &vnd_softc[unit]; 1642 u_int32_t comp_block; 1643 struct uio auio; 1644 caddr_t addr; 1645 int s; 1646 1647 /* set up constants for data move */ 1648 auio.uio_rw = UIO_READ; 1649 auio.uio_segflg = bp->b_flags & B_PHYS ? UIO_USERSPACE : UIO_SYSSPACE; 1650 auio.uio_procp = bp->b_proc; 1651 1652 /* read, and transfer the data */ 1653 addr = bp->b_data; 1654 s = splbio(); 1655 while (bp->b_resid > 0) { 1656 unsigned length; 1657 size_t length_in_buffer; 1658 u_int32_t offset_in_buffer; 1659 struct iovec aiov; 1660 1661 /* calculate the compressed block number */ 1662 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1663 1664 /* check for good block number */ 1665 if (comp_block >= vnd->sc_comp_numoffs) { 1666 bp->b_error = EINVAL; 1667 bp->b_flags |= B_ERROR; 1668 splx(s); 1669 return; 1670 } 1671 1672 /* read in the compressed block, if not in buffer */ 1673 if (comp_block != vnd->sc_comp_buffblk) { 1674 length = vnd->sc_comp_offsets[comp_block + 1] - 1675 vnd->sc_comp_offsets[comp_block]; 1676 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1677 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1678 length, vnd->sc_comp_offsets[comp_block], 1679 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL); 1680 if (error) { 1681 bp->b_error = error; 1682 bp->b_flags |= B_ERROR; 1683 VOP_UNLOCK(vnd->sc_vp, 0); 1684 splx(s); 1685 return; 1686 } 1687 /* uncompress the buffer */ 1688 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1689 vnd->sc_comp_stream.avail_in = length; 1690 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1691 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1692 inflateReset(&vnd->sc_comp_stream); 1693 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1694 if (error != Z_STREAM_END) { 1695 if (vnd->sc_comp_stream.msg) 1696 printf("%s: compressed file, %s\n", 1697 vnd->sc_xname, 1698 vnd->sc_comp_stream.msg); 1699 bp->b_error = EBADMSG; 1700 bp->b_flags |= B_ERROR; 1701 VOP_UNLOCK(vnd->sc_vp, 0); 1702 splx(s); 1703 return; 1704 } 1705 vnd->sc_comp_buffblk = comp_block; 1706 VOP_UNLOCK(vnd->sc_vp, 0); 1707 } 1708 1709 /* transfer the usable uncompressed data */ 1710 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1711 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1712 if (length_in_buffer > bp->b_resid) 1713 length_in_buffer = bp->b_resid; 1714 auio.uio_iov = &aiov; 1715 auio.uio_iovcnt = 1; 1716 aiov.iov_base = addr; 1717 aiov.iov_len = length_in_buffer; 1718 auio.uio_resid = aiov.iov_len; 1719 auio.uio_offset = 0; 1720 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1721 length_in_buffer, &auio); 1722 if (error) { 1723 bp->b_error = error; 1724 bp->b_flags |= B_ERROR; 1725 splx(s); 1726 return; 1727 } 1728 1729 bn += length_in_buffer; 1730 addr += length_in_buffer; 1731 bp->b_resid -= length_in_buffer; 1732 } 1733 splx(s); 1734 } 1735 1736 /* compression memory allocation routines */ 1737 static void * 1738 vnd_alloc(void *aux, u_int items, u_int siz) 1739 { 1740 return malloc(items * siz, M_TEMP, M_NOWAIT); 1741 } 1742 1743 static void 1744 vnd_free(void *aux, void *ptr) 1745 { 1746 free(ptr, M_TEMP); 1747 } 1748 #endif /* VND_COMPRESSION */ 1749