1 /* $NetBSD: vnd.c,v 1.118 2005/07/25 13:25:08 drochner Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 72 * 73 * @(#)vn.c 8.9 (Berkeley) 5/14/95 74 */ 75 76 /* 77 * Copyright (c) 1988 University of Utah. 78 * 79 * This code is derived from software contributed to Berkeley by 80 * the Systems Programming Group of the University of Utah Computer 81 * Science Department. 82 * 83 * Redistribution and use in source and binary forms, with or without 84 * modification, are permitted provided that the following conditions 85 * are met: 86 * 1. Redistributions of source code must retain the above copyright 87 * notice, this list of conditions and the following disclaimer. 88 * 2. Redistributions in binary form must reproduce the above copyright 89 * notice, this list of conditions and the following disclaimer in the 90 * documentation and/or other materials provided with the distribution. 91 * 3. All advertising materials mentioning features or use of this software 92 * must display the following acknowledgement: 93 * This product includes software developed by the University of 94 * California, Berkeley and its contributors. 95 * 4. Neither the name of the University nor the names of its contributors 96 * may be used to endorse or promote products derived from this software 97 * without specific prior written permission. 98 * 99 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 101 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 102 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 103 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 104 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 105 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 106 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 107 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 108 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 109 * SUCH DAMAGE. 110 * 111 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 112 * 113 * @(#)vn.c 8.9 (Berkeley) 5/14/95 114 */ 115 116 /* 117 * Vnode disk driver. 118 * 119 * Block/character interface to a vnode. Allows one to treat a file 120 * as a disk (e.g. build a filesystem in it, mount it, etc.). 121 * 122 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 123 * instead of a simple VOP_RDWR. We do this to avoid distorting the 124 * local buffer cache. 125 * 126 * NOTE 2: There is a security issue involved with this driver. 127 * Once mounted all access to the contents of the "mapped" file via 128 * the special file is controlled by the permissions on the special 129 * file, the protection of the mapped file is ignored (effectively, 130 * by using root credentials in all transactions). 131 * 132 * NOTE 3: Doesn't interact with leases, should it? 133 */ 134 135 #include <sys/cdefs.h> 136 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.118 2005/07/25 13:25:08 drochner Exp $"); 137 138 #if defined(_KERNEL_OPT) 139 #include "fs_nfs.h" 140 #include "opt_vnd.h" 141 #endif 142 143 #include <sys/param.h> 144 #include <sys/systm.h> 145 #include <sys/namei.h> 146 #include <sys/proc.h> 147 #include <sys/kthread.h> 148 #include <sys/errno.h> 149 #include <sys/buf.h> 150 #include <sys/bufq.h> 151 #include <sys/malloc.h> 152 #include <sys/ioctl.h> 153 #include <sys/disklabel.h> 154 #include <sys/device.h> 155 #include <sys/disk.h> 156 #include <sys/stat.h> 157 #include <sys/mount.h> 158 #include <sys/vnode.h> 159 #include <sys/file.h> 160 #include <sys/uio.h> 161 #include <sys/conf.h> 162 #include <net/zlib.h> 163 164 #include <miscfs/specfs/specdev.h> 165 166 #include <dev/vndvar.h> 167 168 #if defined(VNDDEBUG) && !defined(DEBUG) 169 #define DEBUG 170 #endif 171 172 #ifdef DEBUG 173 int dovndcluster = 1; 174 #define VDB_FOLLOW 0x01 175 #define VDB_INIT 0x02 176 #define VDB_IO 0x04 177 #define VDB_LABEL 0x08 178 int vnddebug = 0x00; 179 #endif 180 181 #define vndunit(x) DISKUNIT(x) 182 183 struct vndxfer { 184 struct buf *vx_bp; /* Pointer to parent buffer */ 185 int vx_error; 186 int vx_pending; /* # of pending aux buffers */ 187 int vx_flags; 188 #define VX_BUSY 1 189 }; 190 191 struct vndbuf { 192 struct buf vb_buf; 193 struct vndxfer *vb_xfer; 194 }; 195 196 #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 197 #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 198 199 #define VND_GETBUF(vnd) pool_get(&(vnd)->sc_vbpool, PR_WAITOK) 200 #define VND_PUTBUF(vnd, vb) pool_put(&(vnd)->sc_vbpool, (vb)) 201 202 struct vnd_softc *vnd_softc; 203 int numvnd = 0; 204 205 #define VNDLABELDEV(dev) \ 206 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 207 208 /* called by main() at boot time (XXX: and the LKM driver) */ 209 void vndattach(int); 210 int vnddetach(void); 211 212 static void vndclear(struct vnd_softc *, int); 213 static int vndsetcred(struct vnd_softc *, struct ucred *); 214 static void vndthrottle(struct vnd_softc *, struct vnode *); 215 static void vndiodone(struct buf *); 216 #if 0 217 static void vndshutdown(void); 218 #endif 219 220 static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 221 static void vndgetdisklabel(dev_t); 222 223 static int vndlock(struct vnd_softc *); 224 static void vndunlock(struct vnd_softc *); 225 #ifdef VND_COMPRESSION 226 static void compstrategy(struct buf *, off_t); 227 static void *vnd_alloc(void *, u_int, u_int); 228 static void vnd_free(void *, void *); 229 #endif /* VND_COMPRESSION */ 230 231 void vndthread(void *); 232 233 static dev_type_open(vndopen); 234 static dev_type_close(vndclose); 235 static dev_type_read(vndread); 236 static dev_type_write(vndwrite); 237 static dev_type_ioctl(vndioctl); 238 static dev_type_strategy(vndstrategy); 239 static dev_type_dump(vnddump); 240 static dev_type_size(vndsize); 241 242 const struct bdevsw vnd_bdevsw = { 243 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 244 }; 245 246 const struct cdevsw vnd_cdevsw = { 247 vndopen, vndclose, vndread, vndwrite, vndioctl, 248 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 249 }; 250 251 static int vndattached; 252 253 void 254 vndattach(int num) 255 { 256 int i; 257 char *mem; 258 259 if (vndattached) 260 return; 261 vndattached = 1; 262 if (num <= 0) 263 return; 264 i = num * sizeof(struct vnd_softc); 265 mem = malloc(i, M_DEVBUF, M_NOWAIT|M_ZERO); 266 if (mem == NULL) { 267 printf("WARNING: no memory for vnode disks\n"); 268 return; 269 } 270 vnd_softc = (struct vnd_softc *)mem; 271 numvnd = num; 272 273 for (i = 0; i < numvnd; i++) { 274 vnd_softc[i].sc_unit = i; 275 vnd_softc[i].sc_comp_offsets = NULL; 276 vnd_softc[i].sc_comp_buff = NULL; 277 vnd_softc[i].sc_comp_decombuf = NULL; 278 bufq_alloc(&vnd_softc[i].sc_tab, 279 BUFQ_DISKSORT|BUFQ_SORT_RAWBLOCK); 280 } 281 } 282 283 int 284 vnddetach(void) 285 { 286 int i; 287 288 /* First check we aren't in use. */ 289 for (i = 0; i < numvnd; i++) 290 if (vnd_softc[i].sc_flags & VNF_INITED) 291 return (EBUSY); 292 293 for (i = 0; i < numvnd; i++) 294 bufq_free(&vnd_softc[i].sc_tab); 295 296 free(vnd_softc, M_DEVBUF); 297 vndattached = 0; 298 299 return (0); 300 } 301 302 static int 303 vndopen(dev_t dev, int flags, int mode, struct proc *p) 304 { 305 int unit = vndunit(dev); 306 struct vnd_softc *sc; 307 int error = 0, part, pmask; 308 struct disklabel *lp; 309 310 #ifdef DEBUG 311 if (vnddebug & VDB_FOLLOW) 312 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 313 #endif 314 if (unit >= numvnd) 315 return (ENXIO); 316 sc = &vnd_softc[unit]; 317 318 if ((error = vndlock(sc)) != 0) 319 return (error); 320 321 lp = sc->sc_dkdev.dk_label; 322 323 part = DISKPART(dev); 324 pmask = (1 << part); 325 326 /* 327 * If we're initialized, check to see if there are any other 328 * open partitions. If not, then it's safe to update the 329 * in-core disklabel. Only read the disklabel if it is 330 * not already valid. 331 */ 332 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 333 sc->sc_dkdev.dk_openmask == 0) 334 vndgetdisklabel(dev); 335 336 /* Check that the partitions exists. */ 337 if (part != RAW_PART) { 338 if (((sc->sc_flags & VNF_INITED) == 0) || 339 ((part >= lp->d_npartitions) || 340 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 341 error = ENXIO; 342 goto done; 343 } 344 } 345 346 /* Prevent our unit from being unconfigured while open. */ 347 switch (mode) { 348 case S_IFCHR: 349 sc->sc_dkdev.dk_copenmask |= pmask; 350 break; 351 352 case S_IFBLK: 353 sc->sc_dkdev.dk_bopenmask |= pmask; 354 break; 355 } 356 sc->sc_dkdev.dk_openmask = 357 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 358 359 done: 360 vndunlock(sc); 361 return (error); 362 } 363 364 static int 365 vndclose(dev_t dev, int flags, int mode, struct proc *p) 366 { 367 int unit = vndunit(dev); 368 struct vnd_softc *sc; 369 int error = 0, part; 370 371 #ifdef DEBUG 372 if (vnddebug & VDB_FOLLOW) 373 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 374 #endif 375 376 if (unit >= numvnd) 377 return (ENXIO); 378 sc = &vnd_softc[unit]; 379 380 if ((error = vndlock(sc)) != 0) 381 return (error); 382 383 part = DISKPART(dev); 384 385 /* ...that much closer to allowing unconfiguration... */ 386 switch (mode) { 387 case S_IFCHR: 388 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 389 break; 390 391 case S_IFBLK: 392 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 393 break; 394 } 395 sc->sc_dkdev.dk_openmask = 396 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 397 398 vndunlock(sc); 399 return (0); 400 } 401 402 /* 403 * Qeue the request, and wakeup the kernel thread to handle it. 404 */ 405 static void 406 vndstrategy(struct buf *bp) 407 { 408 int unit = vndunit(bp->b_dev); 409 struct vnd_softc *vnd = &vnd_softc[unit]; 410 struct disklabel *lp = vnd->sc_dkdev.dk_label; 411 int s = splbio(); 412 413 bp->b_resid = bp->b_bcount; 414 415 if ((vnd->sc_flags & VNF_INITED) == 0) { 416 bp->b_error = ENXIO; 417 bp->b_flags |= B_ERROR; 418 goto done; 419 } 420 421 /* 422 * The transfer must be a whole number of blocks. 423 */ 424 if ((bp->b_bcount % lp->d_secsize) != 0) { 425 bp->b_error = EINVAL; 426 bp->b_flags |= B_ERROR; 427 goto done; 428 } 429 430 /* 431 * check if we're read-only. 432 */ 433 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 434 bp->b_error = EACCES; 435 bp->b_flags |= B_ERROR; 436 goto done; 437 } 438 439 /* 440 * Do bounds checking and adjust transfer. If there's an error, 441 * the bounds check will flag that for us. 442 */ 443 if (DISKPART(bp->b_dev) != RAW_PART) { 444 if (bounds_check_with_label(&vnd->sc_dkdev, 445 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 446 goto done; 447 } 448 449 /* If it's a nil transfer, wake up the top half now. */ 450 if (bp->b_bcount == 0) 451 goto done; 452 #ifdef DEBUG 453 if (vnddebug & VDB_FOLLOW) 454 printf("vndstrategy(%p): unit %d\n", bp, unit); 455 #endif 456 BUFQ_PUT(&vnd->sc_tab, bp); 457 wakeup(&vnd->sc_tab); 458 splx(s); 459 return; 460 done: 461 biodone(bp); 462 splx(s); 463 } 464 465 void 466 vndthread(void *arg) 467 { 468 struct vnd_softc *vnd = arg; 469 struct buf *bp; 470 struct vndxfer *vnx; 471 struct mount *mp; 472 int s, bsize, resid; 473 off_t bn; 474 caddr_t addr; 475 int sz, flags, error; 476 struct disklabel *lp; 477 struct partition *pp; 478 479 s = splbio(); 480 vnd->sc_flags |= VNF_KTHREAD; 481 wakeup(&vnd->sc_kthread); 482 483 /* 484 * Dequeue requests, break them into bsize pieces and submit using 485 * VOP_BMAP/VOP_STRATEGY. 486 */ 487 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 488 bp = BUFQ_GET(&vnd->sc_tab); 489 if (bp == NULL) { 490 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 491 continue; 492 }; 493 splx(s); 494 495 #ifdef DEBUG 496 if (vnddebug & VDB_FOLLOW) 497 printf("vndthread(%p\n", bp); 498 #endif 499 lp = vnd->sc_dkdev.dk_label; 500 bp->b_resid = bp->b_bcount; 501 502 /* 503 * Put the block number in terms of the logical blocksize 504 * of the "device". 505 */ 506 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 507 508 /* 509 * Translate the partition-relative block number to an absolute. 510 */ 511 if (DISKPART(bp->b_dev) != RAW_PART) { 512 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 513 DISKPART(bp->b_dev)]; 514 bn += pp->p_offset; 515 } 516 517 /* ...and convert to a byte offset within the file. */ 518 bn *= lp->d_secsize; 519 520 if (vnd->sc_vp->v_mount == NULL) { 521 bp->b_error = ENXIO; 522 bp->b_flags |= B_ERROR; 523 goto done; 524 } 525 #ifdef VND_COMPRESSION 526 /* handle a compressed read */ 527 if ((bp->b_flags & B_READ) && (vnd->sc_flags & VNF_COMP)) { 528 compstrategy(bp, bn); 529 goto done; 530 } 531 #endif /* VND_COMPRESSION */ 532 533 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 534 addr = bp->b_data; 535 flags = (bp->b_flags & (B_READ|B_ASYNC)) | B_CALL; 536 537 /* 538 * Allocate a header for this transfer and link it to the 539 * buffer 540 */ 541 s = splbio(); 542 vnx = VND_GETXFER(vnd); 543 splx(s); 544 vnx->vx_flags = VX_BUSY; 545 vnx->vx_error = 0; 546 vnx->vx_pending = 0; 547 vnx->vx_bp = bp; 548 549 if ((flags & B_READ) == 0) 550 vn_start_write(vnd->sc_vp, &mp, V_WAIT); 551 552 /* 553 * Feed requests sequentially. 554 * We do it this way to keep from flooding NFS servers if we 555 * are connected to an NFS file. This places the burden on 556 * the client rather than the server. 557 */ 558 for (resid = bp->b_resid; resid; resid -= sz) { 559 struct vndbuf *nbp; 560 struct vnode *vp; 561 daddr_t nbn; 562 int off, nra; 563 564 nra = 0; 565 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 566 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 567 VOP_UNLOCK(vnd->sc_vp, 0); 568 569 if (error == 0 && (long)nbn == -1) 570 error = EIO; 571 572 /* 573 * If there was an error or a hole in the file...punt. 574 * Note that we may have to wait for any operations 575 * that we have already fired off before releasing 576 * the buffer. 577 * 578 * XXX we could deal with holes here but it would be 579 * a hassle (in the write case). 580 */ 581 if (error) { 582 s = splbio(); 583 vnx->vx_error = error; 584 goto out; 585 } 586 587 #ifdef DEBUG 588 if (!dovndcluster) 589 nra = 0; 590 #endif 591 592 if ((off = bn % bsize) != 0) 593 sz = bsize - off; 594 else 595 sz = (1 + nra) * bsize; 596 if (resid < sz) 597 sz = resid; 598 #ifdef DEBUG 599 if (vnddebug & VDB_IO) 600 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 601 " sz 0x%x\n", 602 vnd->sc_vp, vp, (long long)bn, nbn, sz); 603 #endif 604 605 s = splbio(); 606 while (vnd->sc_active >= vnd->sc_maxactive) { 607 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 608 } 609 vnd->sc_active++; 610 nbp = VND_GETBUF(vnd); 611 splx(s); 612 BUF_INIT(&nbp->vb_buf); 613 nbp->vb_buf.b_flags = flags; 614 nbp->vb_buf.b_bcount = sz; 615 nbp->vb_buf.b_bufsize = round_page((ulong)addr + sz) 616 - trunc_page((ulong) addr); 617 nbp->vb_buf.b_error = 0; 618 nbp->vb_buf.b_data = addr; 619 nbp->vb_buf.b_blkno = nbp->vb_buf.b_rawblkno = nbn + btodb(off); 620 nbp->vb_buf.b_proc = bp->b_proc; 621 nbp->vb_buf.b_iodone = vndiodone; 622 nbp->vb_buf.b_vp = vp; 623 624 nbp->vb_xfer = vnx; 625 626 BIO_COPYPRIO(&nbp->vb_buf, bp); 627 628 /* 629 * Just sort by block number 630 */ 631 s = splbio(); 632 if (vnx->vx_error != 0) { 633 VND_PUTBUF(vnd, nbp); 634 goto out; 635 } 636 vnx->vx_pending++; 637 #ifdef DEBUG 638 if (vnddebug & VDB_IO) 639 printf("vndstart(%ld): bp %p vp %p blkno " 640 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 641 (long) (vnd-vnd_softc), &nbp->vb_buf, 642 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 643 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 644 nbp->vb_buf.b_bcount); 645 #endif 646 647 /* Instrumentation. */ 648 disk_busy(&vnd->sc_dkdev); 649 650 if ((nbp->vb_buf.b_flags & B_READ) == 0) 651 vp->v_numoutput++; 652 VOP_STRATEGY(vp, &nbp->vb_buf); 653 654 splx(s); 655 bn += sz; 656 addr += sz; 657 } 658 659 s = splbio(); 660 661 out: /* Arrive here at splbio */ 662 if ((flags & B_READ) == 0) 663 vn_finished_write(mp, 0); 664 vnx->vx_flags &= ~VX_BUSY; 665 if (vnx->vx_pending == 0) { 666 if (vnx->vx_error != 0) { 667 bp->b_error = vnx->vx_error; 668 bp->b_flags |= B_ERROR; 669 } 670 VND_PUTXFER(vnd, vnx); 671 biodone(bp); 672 } 673 continue; 674 done: 675 biodone(bp); 676 s = splbio(); 677 } 678 679 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 680 wakeup(&vnd->sc_kthread); 681 splx(s); 682 kthread_exit(0); 683 } 684 685 686 static void 687 vndiodone(struct buf *bp) 688 { 689 struct vndbuf *vbp = (struct vndbuf *) bp; 690 struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; 691 struct buf *pbp = vnx->vx_bp; 692 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 693 int s, resid; 694 695 s = splbio(); 696 #ifdef DEBUG 697 if (vnddebug & VDB_IO) 698 printf("vndiodone(%ld): vbp %p vp %p blkno 0x%" PRIx64 699 " addr %p cnt 0x%x\n", 700 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp, 701 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data, 702 vbp->vb_buf.b_bcount); 703 #endif 704 705 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 706 pbp->b_resid -= resid; 707 disk_unbusy(&vnd->sc_dkdev, resid, (pbp->b_flags & B_READ)); 708 vnx->vx_pending--; 709 710 if (vbp->vb_buf.b_error) { 711 #ifdef DEBUG 712 if (vnddebug & VDB_IO) 713 printf("vndiodone: vbp %p error %d\n", vbp, 714 vbp->vb_buf.b_error); 715 #endif 716 vnx->vx_error = vbp->vb_buf.b_error; 717 } 718 719 VND_PUTBUF(vnd, vbp); 720 721 /* 722 * Wrap up this transaction if it has run to completion or, in 723 * case of an error, when all auxiliary buffers have returned. 724 */ 725 if (vnx->vx_error != 0) { 726 pbp->b_flags |= B_ERROR; 727 pbp->b_error = vnx->vx_error; 728 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 729 730 #ifdef DEBUG 731 if (vnddebug & VDB_IO) 732 printf("vndiodone: pbp %p iodone: error %d\n", 733 pbp, vnx->vx_error); 734 #endif 735 VND_PUTXFER(vnd, vnx); 736 biodone(pbp); 737 } 738 } else if (pbp->b_resid == 0) { 739 740 #ifdef DIAGNOSTIC 741 if (vnx->vx_pending != 0) 742 panic("vndiodone: vnx pending: %d", vnx->vx_pending); 743 #endif 744 745 if ((vnx->vx_flags & VX_BUSY) == 0) { 746 #ifdef DEBUG 747 if (vnddebug & VDB_IO) 748 printf("vndiodone: pbp %p iodone\n", pbp); 749 #endif 750 VND_PUTXFER(vnd, vnx); 751 biodone(pbp); 752 } 753 } 754 755 vnd->sc_active--; 756 wakeup(&vnd->sc_tab); 757 splx(s); 758 } 759 760 /* ARGSUSED */ 761 static int 762 vndread(dev_t dev, struct uio *uio, int flags) 763 { 764 int unit = vndunit(dev); 765 struct vnd_softc *sc; 766 767 #ifdef DEBUG 768 if (vnddebug & VDB_FOLLOW) 769 printf("vndread(0x%x, %p)\n", dev, uio); 770 #endif 771 772 if (unit >= numvnd) 773 return (ENXIO); 774 sc = &vnd_softc[unit]; 775 776 if ((sc->sc_flags & VNF_INITED) == 0) 777 return (ENXIO); 778 779 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 780 } 781 782 /* ARGSUSED */ 783 static int 784 vndwrite(dev_t dev, struct uio *uio, int flags) 785 { 786 int unit = vndunit(dev); 787 struct vnd_softc *sc; 788 789 #ifdef DEBUG 790 if (vnddebug & VDB_FOLLOW) 791 printf("vndwrite(0x%x, %p)\n", dev, uio); 792 #endif 793 794 if (unit >= numvnd) 795 return (ENXIO); 796 sc = &vnd_softc[unit]; 797 798 if ((sc->sc_flags & VNF_INITED) == 0) 799 return (ENXIO); 800 801 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 802 } 803 804 /* ARGSUSED */ 805 static int 806 vndioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) 807 { 808 int unit = vndunit(dev); 809 struct vnd_softc *vnd; 810 struct vnd_ioctl *vio; 811 struct vattr vattr; 812 struct nameidata nd; 813 int error, part, pmask; 814 size_t geomsize; 815 int fflags; 816 #ifdef __HAVE_OLD_DISKLABEL 817 struct disklabel newlabel; 818 #endif 819 820 #ifdef DEBUG 821 if (vnddebug & VDB_FOLLOW) 822 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 823 dev, cmd, data, flag, p, unit); 824 #endif 825 if (unit >= numvnd) 826 return (ENXIO); 827 828 vnd = &vnd_softc[unit]; 829 vio = (struct vnd_ioctl *)data; 830 831 /* Must be open for writes for these commands... */ 832 switch (cmd) { 833 case VNDIOCSET: 834 case VNDIOCCLR: 835 case DIOCSDINFO: 836 case DIOCWDINFO: 837 #ifdef __HAVE_OLD_DISKLABEL 838 case ODIOCSDINFO: 839 case ODIOCWDINFO: 840 #endif 841 case DIOCKLABEL: 842 case DIOCWLABEL: 843 if ((flag & FWRITE) == 0) 844 return (EBADF); 845 } 846 847 /* Must be initialized for these... */ 848 switch (cmd) { 849 case VNDIOCCLR: 850 case DIOCGDINFO: 851 case DIOCSDINFO: 852 case DIOCWDINFO: 853 case DIOCGPART: 854 case DIOCKLABEL: 855 case DIOCWLABEL: 856 case DIOCGDEFLABEL: 857 #ifdef __HAVE_OLD_DISKLABEL 858 case ODIOCGDINFO: 859 case ODIOCSDINFO: 860 case ODIOCWDINFO: 861 case ODIOCGDEFLABEL: 862 #endif 863 if ((vnd->sc_flags & VNF_INITED) == 0) 864 return (ENXIO); 865 } 866 867 switch (cmd) { 868 case VNDIOCSET: 869 if (vnd->sc_flags & VNF_INITED) 870 return (EBUSY); 871 872 if ((error = vndlock(vnd)) != 0) 873 return (error); 874 875 fflags = FREAD; 876 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 877 fflags |= FWRITE; 878 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 879 if ((error = vn_open(&nd, fflags, 0)) != 0) 880 goto unlock_and_exit; 881 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 882 if (!error && nd.ni_vp->v_type != VREG) 883 error = EOPNOTSUPP; 884 if (error) { 885 VOP_UNLOCK(nd.ni_vp, 0); 886 goto close_and_exit; 887 } 888 889 /* If using a compressed file, initialize its info */ 890 /* (or abort with an error if kernel has no compression) */ 891 if (vio->vnd_flags & VNF_COMP) { 892 #ifdef VND_COMPRESSION 893 struct vnd_comp_header *ch; 894 int i; 895 u_int32_t comp_size; 896 u_int32_t comp_maxsize; 897 898 /* allocate space for compresed file header */ 899 ch = malloc(sizeof(struct vnd_comp_header), 900 M_TEMP, M_WAITOK); 901 902 /* read compressed file header */ 903 error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)ch, 904 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 905 IO_UNIT|IO_NODELOCKED, p->p_ucred, NULL, NULL); 906 if(error) { 907 free(ch, M_TEMP); 908 VOP_UNLOCK(nd.ni_vp, 0); 909 goto close_and_exit; 910 } 911 912 /* save some header info */ 913 vnd->sc_comp_blksz = ntohl(ch->block_size); 914 /* note last offset is the file byte size */ 915 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 916 free(ch, M_TEMP); 917 if(vnd->sc_comp_blksz % DEV_BSIZE !=0) { 918 VOP_UNLOCK(nd.ni_vp, 0); 919 error = EINVAL; 920 goto close_and_exit; 921 } 922 if(sizeof(struct vnd_comp_header) + 923 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 924 vattr.va_size) { 925 VOP_UNLOCK(nd.ni_vp, 0); 926 error = EINVAL; 927 goto close_and_exit; 928 } 929 930 /* set decompressed file size */ 931 vattr.va_size = 932 (vnd->sc_comp_numoffs - 1) * vnd->sc_comp_blksz; 933 934 /* allocate space for all the compressed offsets */ 935 vnd->sc_comp_offsets = 936 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 937 M_DEVBUF, M_WAITOK); 938 939 /* read in the offsets */ 940 error = vn_rdwr(UIO_READ, nd.ni_vp, 941 (caddr_t)vnd->sc_comp_offsets, 942 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 943 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 944 IO_UNIT|IO_NODELOCKED, p->p_ucred, NULL, NULL); 945 if(error) { 946 VOP_UNLOCK(nd.ni_vp, 0); 947 goto close_and_exit; 948 } 949 /* 950 * find largest block size (used for allocation limit). 951 * Also convert offset to native byte order. 952 */ 953 comp_maxsize = 0; 954 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 955 vnd->sc_comp_offsets[i] = 956 be64toh(vnd->sc_comp_offsets[i]); 957 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 958 - vnd->sc_comp_offsets[i]; 959 if (comp_size > comp_maxsize) 960 comp_maxsize = comp_size; 961 } 962 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 963 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 964 965 /* create compressed data buffer */ 966 vnd->sc_comp_buff = malloc(comp_maxsize, 967 M_DEVBUF, M_WAITOK); 968 969 /* create decompressed buffer */ 970 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 971 M_DEVBUF, M_WAITOK); 972 vnd->sc_comp_buffblk = -1; 973 974 /* Initialize decompress stream */ 975 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 976 vnd->sc_comp_stream.zalloc = vnd_alloc; 977 vnd->sc_comp_stream.zfree = vnd_free; 978 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 979 if(error) { 980 if(vnd->sc_comp_stream.msg) 981 printf("vnd%d: compressed file, %s\n", 982 unit, vnd->sc_comp_stream.msg); 983 VOP_UNLOCK(nd.ni_vp, 0); 984 error = EINVAL; 985 goto close_and_exit; 986 } 987 988 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 989 #else /* !VND_COMPRESSION */ 990 error = EOPNOTSUPP; 991 goto close_and_exit; 992 #endif /* VND_COMPRESSION */ 993 } 994 995 VOP_UNLOCK(nd.ni_vp, 0); 996 vnd->sc_vp = nd.ni_vp; 997 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 998 999 /* 1000 * Use pseudo-geometry specified. If none was provided, 1001 * use "standard" Adaptec fictitious geometry. 1002 */ 1003 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1004 1005 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1006 sizeof(vio->vnd_geom)); 1007 1008 /* 1009 * Sanity-check the sector size. 1010 * XXX Don't allow secsize < DEV_BSIZE. Should 1011 * XXX we? 1012 */ 1013 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 1014 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 1015 vnd->sc_geom.vng_ncylinders == 0 || 1016 (vnd->sc_geom.vng_ntracks * 1017 vnd->sc_geom.vng_nsectors) == 0) { 1018 error = EINVAL; 1019 goto close_and_exit; 1020 } 1021 1022 /* 1023 * Compute the size (in DEV_BSIZE blocks) specified 1024 * by the geometry. 1025 */ 1026 geomsize = (vnd->sc_geom.vng_nsectors * 1027 vnd->sc_geom.vng_ntracks * 1028 vnd->sc_geom.vng_ncylinders) * 1029 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1030 1031 /* 1032 * Sanity-check the size against the specified 1033 * geometry. 1034 */ 1035 if (vnd->sc_size < geomsize) { 1036 error = EINVAL; 1037 goto close_and_exit; 1038 } 1039 } else if (vnd->sc_size >= (32 * 64)) { 1040 /* 1041 * Size must be at least 2048 DEV_BSIZE blocks 1042 * (1M) in order to use this geometry. 1043 */ 1044 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1045 vnd->sc_geom.vng_nsectors = 32; 1046 vnd->sc_geom.vng_ntracks = 64; 1047 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1048 } else { 1049 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1050 vnd->sc_geom.vng_nsectors = 1; 1051 vnd->sc_geom.vng_ntracks = 1; 1052 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1053 } 1054 1055 if (vio->vnd_flags & VNDIOF_READONLY) { 1056 vnd->sc_flags |= VNF_READONLY; 1057 } 1058 1059 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) 1060 goto close_and_exit; 1061 1062 memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */ 1063 snprintf(vnd->sc_xname, sizeof(vnd->sc_xname), "vnd%d", unit); 1064 1065 1066 vndthrottle(vnd, vnd->sc_vp); 1067 vio->vnd_size = dbtob(vnd->sc_size); 1068 vnd->sc_flags |= VNF_INITED; 1069 1070 /* create the kernel thread, wait for it to be up */ 1071 error = kthread_create1(vndthread, vnd, &vnd->sc_kthread, 1072 vnd->sc_xname); 1073 if (error) 1074 goto close_and_exit; 1075 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1076 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1077 } 1078 #ifdef DEBUG 1079 if (vnddebug & VDB_INIT) 1080 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1081 vnd->sc_vp, (unsigned long) vnd->sc_size, 1082 vnd->sc_geom.vng_secsize, 1083 vnd->sc_geom.vng_nsectors, 1084 vnd->sc_geom.vng_ntracks, 1085 vnd->sc_geom.vng_ncylinders); 1086 #endif 1087 1088 /* Attach the disk. */ 1089 vnd->sc_dkdev.dk_name = vnd->sc_xname; 1090 disk_attach(&vnd->sc_dkdev); 1091 1092 /* Initialize the xfer and buffer pools. */ 1093 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1094 0, 0, "vndxpl", NULL); 1095 pool_init(&vnd->sc_vbpool, sizeof(struct vndbuf), 0, 1096 0, 0, "vndbpl", NULL); 1097 1098 /* Try and read the disklabel. */ 1099 vndgetdisklabel(dev); 1100 1101 vndunlock(vnd); 1102 1103 break; 1104 1105 close_and_exit: 1106 (void) vn_close(nd.ni_vp, fflags, p->p_ucred, p); 1107 unlock_and_exit: 1108 #ifdef VND_COMPRESSION 1109 /* free any allocated memory (for compressed file) */ 1110 if(vnd->sc_comp_offsets) { 1111 free(vnd->sc_comp_offsets, M_DEVBUF); 1112 vnd->sc_comp_offsets = NULL; 1113 } 1114 if(vnd->sc_comp_buff) { 1115 free(vnd->sc_comp_buff, M_DEVBUF); 1116 vnd->sc_comp_buff = NULL; 1117 } 1118 if(vnd->sc_comp_decombuf) { 1119 free(vnd->sc_comp_decombuf, M_DEVBUF); 1120 vnd->sc_comp_decombuf = NULL; 1121 } 1122 #endif /* VND_COMPRESSION */ 1123 vndunlock(vnd); 1124 return (error); 1125 1126 case VNDIOCCLR: 1127 if ((error = vndlock(vnd)) != 0) 1128 return (error); 1129 1130 /* 1131 * Don't unconfigure if any other partitions are open 1132 * or if both the character and block flavors of this 1133 * partition are open. 1134 */ 1135 part = DISKPART(dev); 1136 pmask = (1 << part); 1137 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1138 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1139 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1140 !(vio->vnd_flags & VNDIOF_FORCE)) { 1141 vndunlock(vnd); 1142 return (EBUSY); 1143 } 1144 1145 /* 1146 * XXX vndclear() might call vndclose() implicitely; 1147 * release lock to avoid recursion 1148 */ 1149 vndunlock(vnd); 1150 vndclear(vnd, minor(dev)); 1151 #ifdef DEBUG 1152 if (vnddebug & VDB_INIT) 1153 printf("vndioctl: CLRed\n"); 1154 #endif 1155 1156 /* Destroy the xfer and buffer pools. */ 1157 pool_destroy(&vnd->sc_vxpool); 1158 pool_destroy(&vnd->sc_vbpool); 1159 1160 /* Detatch the disk. */ 1161 disk_detach(&vnd->sc_dkdev); 1162 1163 break; 1164 1165 case VNDIOCGET: { 1166 struct vnd_user *vnu; 1167 struct vattr va; 1168 1169 vnu = (struct vnd_user *)data; 1170 1171 if (vnu->vnu_unit == -1) 1172 vnu->vnu_unit = unit; 1173 if (vnu->vnu_unit >= numvnd) 1174 return (ENXIO); 1175 if (vnu->vnu_unit < 0) 1176 return (EINVAL); 1177 1178 vnd = &vnd_softc[vnu->vnu_unit]; 1179 1180 if (vnd->sc_flags & VNF_INITED) { 1181 error = VOP_GETATTR(vnd->sc_vp, &va, p->p_ucred, p); 1182 if (error) 1183 return (error); 1184 vnu->vnu_dev = va.va_fsid; 1185 vnu->vnu_ino = va.va_fileid; 1186 } 1187 else { 1188 /* unused is not an error */ 1189 vnu->vnu_dev = 0; 1190 vnu->vnu_ino = 0; 1191 } 1192 1193 break; 1194 } 1195 1196 case DIOCGDINFO: 1197 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1198 break; 1199 1200 #ifdef __HAVE_OLD_DISKLABEL 1201 case ODIOCGDINFO: 1202 newlabel = *(vnd->sc_dkdev.dk_label); 1203 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1204 return ENOTTY; 1205 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1206 break; 1207 #endif 1208 1209 case DIOCGPART: 1210 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1211 ((struct partinfo *)data)->part = 1212 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1213 break; 1214 1215 case DIOCWDINFO: 1216 case DIOCSDINFO: 1217 #ifdef __HAVE_OLD_DISKLABEL 1218 case ODIOCWDINFO: 1219 case ODIOCSDINFO: 1220 #endif 1221 { 1222 struct disklabel *lp; 1223 1224 if ((error = vndlock(vnd)) != 0) 1225 return (error); 1226 1227 vnd->sc_flags |= VNF_LABELLING; 1228 1229 #ifdef __HAVE_OLD_DISKLABEL 1230 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1231 memset(&newlabel, 0, sizeof newlabel); 1232 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1233 lp = &newlabel; 1234 } else 1235 #endif 1236 lp = (struct disklabel *)data; 1237 1238 error = setdisklabel(vnd->sc_dkdev.dk_label, 1239 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1240 if (error == 0) { 1241 if (cmd == DIOCWDINFO 1242 #ifdef __HAVE_OLD_DISKLABEL 1243 || cmd == ODIOCWDINFO 1244 #endif 1245 ) 1246 error = writedisklabel(VNDLABELDEV(dev), 1247 vndstrategy, vnd->sc_dkdev.dk_label, 1248 vnd->sc_dkdev.dk_cpulabel); 1249 } 1250 1251 vnd->sc_flags &= ~VNF_LABELLING; 1252 1253 vndunlock(vnd); 1254 1255 if (error) 1256 return (error); 1257 break; 1258 } 1259 1260 case DIOCKLABEL: 1261 if (*(int *)data != 0) 1262 vnd->sc_flags |= VNF_KLABEL; 1263 else 1264 vnd->sc_flags &= ~VNF_KLABEL; 1265 break; 1266 1267 case DIOCWLABEL: 1268 if (*(int *)data != 0) 1269 vnd->sc_flags |= VNF_WLABEL; 1270 else 1271 vnd->sc_flags &= ~VNF_WLABEL; 1272 break; 1273 1274 case DIOCGDEFLABEL: 1275 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1276 break; 1277 1278 #ifdef __HAVE_OLD_DISKLABEL 1279 case ODIOCGDEFLABEL: 1280 vndgetdefaultlabel(vnd, &newlabel); 1281 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1282 return ENOTTY; 1283 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1284 break; 1285 #endif 1286 1287 default: 1288 return (ENOTTY); 1289 } 1290 1291 return (0); 1292 } 1293 1294 /* 1295 * Duplicate the current processes' credentials. Since we are called only 1296 * as the result of a SET ioctl and only root can do that, any future access 1297 * to this "disk" is essentially as root. Note that credentials may change 1298 * if some other uid can write directly to the mapped file (NFS). 1299 */ 1300 static int 1301 vndsetcred(struct vnd_softc *vnd, struct ucred *cred) 1302 { 1303 struct uio auio; 1304 struct iovec aiov; 1305 char *tmpbuf; 1306 int error; 1307 1308 vnd->sc_cred = crdup(cred); 1309 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1310 1311 /* XXX: Horrible kludge to establish credentials for NFS */ 1312 aiov.iov_base = tmpbuf; 1313 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1314 auio.uio_iov = &aiov; 1315 auio.uio_iovcnt = 1; 1316 auio.uio_offset = 0; 1317 auio.uio_rw = UIO_READ; 1318 auio.uio_segflg = UIO_SYSSPACE; 1319 auio.uio_resid = aiov.iov_len; 1320 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1321 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1322 if (error == 0) { 1323 /* 1324 * Because vnd does all IO directly through the vnode 1325 * we need to flush (at least) the buffer from the above 1326 * VOP_READ from the buffer cache to prevent cache 1327 * incoherencies. Also, be careful to write dirty 1328 * buffers back to stable storage. 1329 */ 1330 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1331 curproc, 0, 0); 1332 } 1333 VOP_UNLOCK(vnd->sc_vp, 0); 1334 1335 free(tmpbuf, M_TEMP); 1336 return (error); 1337 } 1338 1339 /* 1340 * Set maxactive based on FS type 1341 */ 1342 static void 1343 vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1344 { 1345 #ifdef NFS 1346 extern int (**nfsv2_vnodeop_p)(void *); 1347 1348 if (vp->v_op == nfsv2_vnodeop_p) 1349 vnd->sc_maxactive = 2; 1350 else 1351 #endif 1352 vnd->sc_maxactive = 8; 1353 1354 if (vnd->sc_maxactive < 1) 1355 vnd->sc_maxactive = 1; 1356 } 1357 1358 #if 0 1359 static void 1360 vndshutdown(void) 1361 { 1362 struct vnd_softc *vnd; 1363 1364 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1365 if (vnd->sc_flags & VNF_INITED) 1366 vndclear(vnd); 1367 } 1368 #endif 1369 1370 static void 1371 vndclear(struct vnd_softc *vnd, int myminor) 1372 { 1373 struct vnode *vp = vnd->sc_vp; 1374 struct proc *p = curproc; /* XXX */ 1375 int fflags = FREAD; 1376 int bmaj, cmaj, i, mn; 1377 int s; 1378 1379 #ifdef DEBUG 1380 if (vnddebug & VDB_FOLLOW) 1381 printf("vndclear(%p): vp %p\n", vnd, vp); 1382 #endif 1383 /* locate the major number */ 1384 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1385 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1386 1387 /* Nuke the vnodes for any open instances */ 1388 for (i = 0; i < MAXPARTITIONS; i++) { 1389 mn = DISKMINOR(vnd->sc_unit, i); 1390 vdevgone(bmaj, mn, mn, VBLK); 1391 if (mn != myminor) /* XXX avoid to kill own vnode */ 1392 vdevgone(cmaj, mn, mn, VCHR); 1393 } 1394 1395 if ((vnd->sc_flags & VNF_READONLY) == 0) 1396 fflags |= FWRITE; 1397 1398 s = splbio(); 1399 bufq_drain(&vnd->sc_tab); 1400 splx(s); 1401 1402 vnd->sc_flags |= VNF_VUNCONF; 1403 wakeup(&vnd->sc_tab); 1404 while (vnd->sc_flags & VNF_KTHREAD) 1405 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1406 1407 #ifdef VND_COMPRESSION 1408 /* free the compressed file buffers */ 1409 if(vnd->sc_flags & VNF_COMP) { 1410 if(vnd->sc_comp_offsets) { 1411 free(vnd->sc_comp_offsets, M_DEVBUF); 1412 vnd->sc_comp_offsets = NULL; 1413 } 1414 if(vnd->sc_comp_buff) { 1415 free(vnd->sc_comp_buff, M_DEVBUF); 1416 vnd->sc_comp_buff = NULL; 1417 } 1418 if(vnd->sc_comp_decombuf) { 1419 free(vnd->sc_comp_decombuf, M_DEVBUF); 1420 vnd->sc_comp_decombuf = NULL; 1421 } 1422 } 1423 #endif /* VND_COMPRESSION */ 1424 vnd->sc_flags &= 1425 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1426 | VNF_VUNCONF | VNF_COMP); 1427 if (vp == (struct vnode *)0) 1428 panic("vndclear: null vp"); 1429 (void) vn_close(vp, fflags, vnd->sc_cred, p); 1430 crfree(vnd->sc_cred); 1431 vnd->sc_vp = (struct vnode *)0; 1432 vnd->sc_cred = (struct ucred *)0; 1433 vnd->sc_size = 0; 1434 } 1435 1436 static int 1437 vndsize(dev_t dev) 1438 { 1439 struct vnd_softc *sc; 1440 struct disklabel *lp; 1441 int part, unit, omask; 1442 int size; 1443 1444 unit = vndunit(dev); 1445 if (unit >= numvnd) 1446 return (-1); 1447 sc = &vnd_softc[unit]; 1448 1449 if ((sc->sc_flags & VNF_INITED) == 0) 1450 return (-1); 1451 1452 part = DISKPART(dev); 1453 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1454 lp = sc->sc_dkdev.dk_label; 1455 1456 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curproc)) 1457 return (-1); 1458 1459 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1460 size = -1; 1461 else 1462 size = lp->d_partitions[part].p_size * 1463 (lp->d_secsize / DEV_BSIZE); 1464 1465 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curproc)) 1466 return (-1); 1467 1468 return (size); 1469 } 1470 1471 static int 1472 vnddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 1473 { 1474 1475 /* Not implemented. */ 1476 return ENXIO; 1477 } 1478 1479 static void 1480 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1481 { 1482 struct vndgeom *vng = &sc->sc_geom; 1483 struct partition *pp; 1484 1485 memset(lp, 0, sizeof(*lp)); 1486 1487 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1488 lp->d_secsize = vng->vng_secsize; 1489 lp->d_nsectors = vng->vng_nsectors; 1490 lp->d_ntracks = vng->vng_ntracks; 1491 lp->d_ncylinders = vng->vng_ncylinders; 1492 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1493 1494 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1495 lp->d_type = DTYPE_VND; 1496 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1497 lp->d_rpm = 3600; 1498 lp->d_interleave = 1; 1499 lp->d_flags = 0; 1500 1501 pp = &lp->d_partitions[RAW_PART]; 1502 pp->p_offset = 0; 1503 pp->p_size = lp->d_secperunit; 1504 pp->p_fstype = FS_UNUSED; 1505 lp->d_npartitions = RAW_PART + 1; 1506 1507 lp->d_magic = DISKMAGIC; 1508 lp->d_magic2 = DISKMAGIC; 1509 lp->d_checksum = dkcksum(lp); 1510 } 1511 1512 /* 1513 * Read the disklabel from a vnd. If one is not present, create a fake one. 1514 */ 1515 static void 1516 vndgetdisklabel(dev_t dev) 1517 { 1518 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1519 const char *errstring; 1520 struct disklabel *lp = sc->sc_dkdev.dk_label; 1521 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1522 int i; 1523 1524 memset(clp, 0, sizeof(*clp)); 1525 1526 vndgetdefaultlabel(sc, lp); 1527 1528 /* 1529 * Call the generic disklabel extraction routine. 1530 */ 1531 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1532 if (errstring) { 1533 /* 1534 * Lack of disklabel is common, but we print the warning 1535 * anyway, since it might contain other useful information. 1536 */ 1537 printf("%s: %s\n", sc->sc_xname, errstring); 1538 1539 /* 1540 * For historical reasons, if there's no disklabel 1541 * present, all partitions must be FS_BSDFFS and 1542 * occupy the entire disk. 1543 */ 1544 for (i = 0; i < MAXPARTITIONS; i++) { 1545 /* 1546 * Don't wipe out port specific hack (such as 1547 * dos partition hack of i386 port). 1548 */ 1549 if (lp->d_partitions[i].p_size != 0) 1550 continue; 1551 1552 lp->d_partitions[i].p_size = lp->d_secperunit; 1553 lp->d_partitions[i].p_offset = 0; 1554 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1555 } 1556 1557 strncpy(lp->d_packname, "default label", 1558 sizeof(lp->d_packname)); 1559 1560 lp->d_npartitions = MAXPARTITIONS; 1561 lp->d_checksum = dkcksum(lp); 1562 } 1563 1564 /* In-core label now valid. */ 1565 sc->sc_flags |= VNF_VLABEL; 1566 } 1567 1568 /* 1569 * Wait interruptibly for an exclusive lock. 1570 * 1571 * XXX 1572 * Several drivers do this; it should be abstracted and made MP-safe. 1573 */ 1574 static int 1575 vndlock(struct vnd_softc *sc) 1576 { 1577 int error; 1578 1579 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1580 sc->sc_flags |= VNF_WANTED; 1581 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1582 return (error); 1583 } 1584 sc->sc_flags |= VNF_LOCKED; 1585 return (0); 1586 } 1587 1588 /* 1589 * Unlock and wake up any waiters. 1590 */ 1591 static void 1592 vndunlock(struct vnd_softc *sc) 1593 { 1594 1595 sc->sc_flags &= ~VNF_LOCKED; 1596 if ((sc->sc_flags & VNF_WANTED) != 0) { 1597 sc->sc_flags &= ~VNF_WANTED; 1598 wakeup(sc); 1599 } 1600 } 1601 1602 #ifdef VND_COMPRESSION 1603 /* compressed file read */ 1604 static void 1605 compstrategy(struct buf *bp, off_t bn) 1606 { 1607 int error; 1608 int unit = vndunit(bp->b_dev); 1609 struct vnd_softc *vnd = &vnd_softc[unit]; 1610 u_int32_t comp_block; 1611 struct uio auio; 1612 caddr_t addr; 1613 int s; 1614 1615 /* set up constants for data move */ 1616 auio.uio_rw = UIO_READ; 1617 auio.uio_segflg = bp->b_flags & B_PHYS ? UIO_USERSPACE : UIO_SYSSPACE; 1618 auio.uio_procp = bp->b_proc; 1619 1620 /* read, and transfer the data */ 1621 addr = bp->b_data; 1622 s = splbio(); 1623 while (bp->b_resid > 0) { 1624 unsigned length; 1625 size_t length_in_buffer; 1626 u_int32_t offset_in_buffer; 1627 struct iovec aiov; 1628 1629 /* calculate the compressed block number */ 1630 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1631 1632 /* check for good block number */ 1633 if (comp_block >= vnd->sc_comp_numoffs) { 1634 bp->b_error = EINVAL; 1635 bp->b_flags |= B_ERROR; 1636 splx(s); 1637 return; 1638 } 1639 1640 /* read in the compressed block, if not in buffer */ 1641 if (comp_block != vnd->sc_comp_buffblk) { 1642 length = vnd->sc_comp_offsets[comp_block + 1] - 1643 vnd->sc_comp_offsets[comp_block]; 1644 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1645 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1646 length, vnd->sc_comp_offsets[comp_block], 1647 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL); 1648 if (error) { 1649 bp->b_error = error; 1650 bp->b_flags |= B_ERROR; 1651 VOP_UNLOCK(vnd->sc_vp, 0); 1652 splx(s); 1653 return; 1654 } 1655 /* uncompress the buffer */ 1656 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1657 vnd->sc_comp_stream.avail_in = length; 1658 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1659 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1660 inflateReset(&vnd->sc_comp_stream); 1661 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1662 if (error != Z_STREAM_END) { 1663 if (vnd->sc_comp_stream.msg) 1664 printf("%s: compressed file, %s\n", 1665 vnd->sc_xname, 1666 vnd->sc_comp_stream.msg); 1667 bp->b_error = EBADMSG; 1668 bp->b_flags |= B_ERROR; 1669 VOP_UNLOCK(vnd->sc_vp, 0); 1670 splx(s); 1671 return; 1672 } 1673 vnd->sc_comp_buffblk = comp_block; 1674 VOP_UNLOCK(vnd->sc_vp, 0); 1675 } 1676 1677 /* transfer the usable uncompressed data */ 1678 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1679 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1680 if (length_in_buffer > bp->b_resid) 1681 length_in_buffer = bp->b_resid; 1682 auio.uio_iov = &aiov; 1683 auio.uio_iovcnt = 1; 1684 aiov.iov_base = addr; 1685 aiov.iov_len = length_in_buffer; 1686 auio.uio_resid = aiov.iov_len; 1687 auio.uio_offset = 0; 1688 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1689 length_in_buffer, &auio); 1690 if (error) { 1691 bp->b_error = error; 1692 bp->b_flags |= B_ERROR; 1693 splx(s); 1694 return; 1695 } 1696 1697 bn += length_in_buffer; 1698 addr += length_in_buffer; 1699 bp->b_resid -= length_in_buffer; 1700 } 1701 splx(s); 1702 } 1703 1704 /* compression memory allocation routines */ 1705 static void * 1706 vnd_alloc(void *aux, u_int items, u_int siz) 1707 { 1708 return malloc(items * siz, M_TEMP, M_NOWAIT); 1709 } 1710 1711 static void 1712 vnd_free(void *aux, void *ptr) 1713 { 1714 free(ptr, M_TEMP); 1715 } 1716 #endif /* VND_COMPRESSION */ 1717