1 /* $OpenBSD: vnd.c,v 1.41 2003/10/17 23:05:39 tedu Exp $ */ 2 /* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1988 University of Utah. 6 * Copyright (c) 1990, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 38 * 39 * @(#)vn.c 8.6 (Berkeley) 4/1/94 40 */ 41 42 /* 43 * Vnode disk driver. 44 * 45 * Block/character interface to a vnode. Allows one to treat a file 46 * as a disk (e.g. build a filesystem in it, mount it, etc.). 47 * 48 * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the 49 * vnode or simple VOP_READ/VOP_WRITE. The former is suitable for swapping 50 * as it doesn't distort the local buffer cache. The latter is good for 51 * building disk images as it keeps the cache consistent after the block 52 * device is closed. 53 * 54 * NOTE 2: There is a security issue involved with this driver. 55 * Once mounted all access to the contents of the "mapped" file via 56 * the special file is controlled by the permissions on the special 57 * file, the protection of the mapped file is ignored (effectively, 58 * by using root credentials in all transactions). 59 * 60 * NOTE 3: Doesn't interact with leases, should it? 61 */ 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/namei.h> 66 #include <sys/proc.h> 67 #include <sys/errno.h> 68 #include <sys/buf.h> 69 #include <sys/malloc.h> 70 #include <sys/ioctl.h> 71 #include <sys/disklabel.h> 72 #include <sys/device.h> 73 #include <sys/disk.h> 74 #include <sys/stat.h> 75 #include <sys/mount.h> 76 #include <sys/vnode.h> 77 #include <sys/file.h> 78 #include <sys/uio.h> 79 #include <sys/conf.h> 80 81 #include <crypto/blf.h> 82 83 #include <miscfs/specfs/specdev.h> 84 85 #include <dev/vndioctl.h> 86 87 #ifdef DEBUG 88 int dovndcluster = 1; 89 int vnddebug = 0x00; 90 #define VDB_FOLLOW 0x01 91 #define VDB_INIT 0x02 92 #define VDB_IO 0x04 93 #endif 94 95 #define b_cylin b_resid 96 97 /* 98 * vndunit is a bit weird. have to reconstitute the dev_t for 99 * DISKUNIT(), but with the minor masked off. 100 */ 101 #define vndunit(x) DISKUNIT(makedev(major(x), minor(x) & 0x7ff)) 102 #define vndsimple(x) (minor(x) & 0x800) 103 #define MAKEVNDDEV(maj, unit, part) MAKEDISKDEV(maj, unit, part) 104 105 #define VNDLABELDEV(dev) (MAKEVNDDEV(major(dev), vndunit(dev), RAW_PART)) 106 107 struct vndbuf { 108 struct buf vb_buf; 109 struct buf *vb_obp; 110 }; 111 112 #define getvndbuf() \ 113 ((struct vndbuf *)malloc(sizeof(struct vndbuf), M_DEVBUF, M_WAITOK)) 114 #define putvndbuf(vbp) \ 115 free((caddr_t)(vbp), M_DEVBUF) 116 117 struct vnd_softc { 118 struct device sc_dev; 119 struct disk sc_dk; 120 121 int sc_flags; /* flags */ 122 size_t sc_size; /* size of vnd in blocks */ 123 struct vnode *sc_vp; /* vnode */ 124 struct ucred *sc_cred; /* credentials */ 125 int sc_maxactive; /* max # of active requests */ 126 struct buf sc_tab; /* transfer queue */ 127 void *sc_keyctx; /* key context */ 128 }; 129 130 /* sc_flags */ 131 #define VNF_ALIVE 0x0001 132 #define VNF_INITED 0x0002 133 #define VNF_WANTED 0x0040 134 #define VNF_LOCKED 0x0080 135 #define VNF_LABELLING 0x0100 136 #define VNF_WLABEL 0x0200 137 #define VNF_HAVELABEL 0x0400 138 #define VNF_BUSY 0x0800 139 #define VNF_SIMPLE 0x1000 140 141 struct vnd_softc *vnd_softc; 142 int numvnd = 0; 143 144 struct dkdriver vnddkdriver = { vndstrategy }; 145 146 /* called by main() at boot time */ 147 void vndattach(int); 148 149 void vndclear(struct vnd_softc *); 150 void vndstart(struct vnd_softc *); 151 int vndsetcred(struct vnd_softc *, struct ucred *); 152 void vndthrottle(struct vnd_softc *, struct vnode *); 153 void vndiodone(struct buf *); 154 void vndshutdown(void); 155 void vndgetdisklabel(dev_t, struct vnd_softc *); 156 void vndencrypt(struct vnd_softc *, caddr_t, size_t, daddr_t, int); 157 158 int vndlock(struct vnd_softc *); 159 void vndunlock(struct vnd_softc *); 160 161 void 162 vndencrypt(vnd, addr, size, off, encrypt) 163 struct vnd_softc *vnd; 164 caddr_t addr; 165 size_t size; 166 daddr_t off; 167 int encrypt; 168 { 169 int i, bsize; 170 u_char iv[8]; 171 172 bsize = dbtob(1); 173 for (i = 0; i < size/bsize; i++) { 174 bzero(iv, sizeof(iv)); 175 bcopy((u_char *)&off, iv, sizeof(off)); 176 blf_ecb_encrypt(vnd->sc_keyctx, iv, sizeof(iv)); 177 if (encrypt) 178 blf_cbc_encrypt(vnd->sc_keyctx, iv, addr, bsize); 179 else 180 blf_cbc_decrypt(vnd->sc_keyctx, iv, addr, bsize); 181 182 addr += bsize; 183 off++; 184 } 185 } 186 187 void 188 vndattach(num) 189 int num; 190 { 191 char *mem; 192 u_long size; 193 194 if (num <= 0) 195 return; 196 size = num * sizeof(struct vnd_softc); 197 mem = malloc(size, M_DEVBUF, M_NOWAIT); 198 if (mem == NULL) { 199 printf("WARNING: no memory for vnode disks\n"); 200 return; 201 } 202 bzero(mem, size); 203 vnd_softc = (struct vnd_softc *)mem; 204 numvnd = num; 205 } 206 207 int 208 vndopen(dev, flags, mode, p) 209 dev_t dev; 210 int flags, mode; 211 struct proc *p; 212 { 213 int unit = vndunit(dev); 214 struct vnd_softc *sc; 215 int error = 0, part, pmask; 216 217 #ifdef DEBUG 218 if (vnddebug & VDB_FOLLOW) 219 printf("vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p); 220 #endif 221 if (unit >= numvnd) 222 return (ENXIO); 223 sc = &vnd_softc[unit]; 224 225 if ((error = vndlock(sc)) != 0) 226 return (error); 227 228 if ((sc->sc_flags & VNF_INITED) && 229 (sc->sc_flags & VNF_HAVELABEL) == 0) { 230 sc->sc_flags |= VNF_HAVELABEL; 231 vndgetdisklabel(dev, sc); 232 } 233 234 part = DISKPART(dev); 235 pmask = 1 << part; 236 237 /* 238 * If any partition is open, all succeeding openings must be of the 239 * same type. 240 */ 241 if (sc->sc_dk.dk_openmask) { 242 if (((sc->sc_flags & VNF_SIMPLE) != 0) != 243 (vndsimple(dev) != 0)) { 244 error = EBUSY; 245 goto bad; 246 } 247 } else if (vndsimple(dev)) 248 sc->sc_flags |= VNF_SIMPLE; 249 else 250 sc->sc_flags &= ~VNF_SIMPLE; 251 252 /* Check that the partition exists. */ 253 if (part != RAW_PART && 254 ((sc->sc_flags & VNF_HAVELABEL) == 0 || 255 part >= sc->sc_dk.dk_label->d_npartitions || 256 sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) { 257 error = ENXIO; 258 goto bad; 259 } 260 261 /* Prevent our unit from being unconfigured while open. */ 262 switch (mode) { 263 case S_IFCHR: 264 sc->sc_dk.dk_copenmask |= pmask; 265 break; 266 267 case S_IFBLK: 268 sc->sc_dk.dk_bopenmask |= pmask; 269 break; 270 } 271 sc->sc_dk.dk_openmask = 272 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 273 274 vndunlock(sc); 275 return (0); 276 bad: 277 vndunlock(sc); 278 return (error); 279 } 280 281 /* 282 * Load the label information on the named device 283 */ 284 void 285 vndgetdisklabel(dev, sc) 286 dev_t dev; 287 struct vnd_softc *sc; 288 { 289 struct disklabel *lp = sc->sc_dk.dk_label; 290 char *errstring; 291 292 bzero(lp, sizeof(struct disklabel)); 293 bzero(sc->sc_dk.dk_cpulabel, sizeof(struct cpu_disklabel)); 294 295 lp->d_secsize = 512; 296 lp->d_ntracks = 1; 297 lp->d_nsectors = 100; 298 lp->d_ncylinders = sc->sc_size / 100; 299 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 300 if (lp->d_secpercyl == 0) { 301 lp->d_secpercyl = 100; 302 /* as long as it's not 0 - readdisklabel divides by it (?) */ 303 } 304 305 strncpy(lp->d_typename, "vnd device", sizeof(lp->d_typename)); 306 lp->d_type = DTYPE_SCSI; 307 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 308 lp->d_secperunit = sc->sc_size; 309 lp->d_rpm = 3600; 310 lp->d_interleave = 1; 311 lp->d_flags = 0; 312 313 lp->d_partitions[RAW_PART].p_offset = 0; 314 lp->d_partitions[RAW_PART].p_size = 315 lp->d_secperunit * (lp->d_secsize / DEV_BSIZE); 316 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 317 lp->d_npartitions = RAW_PART + 1; 318 319 lp->d_magic = DISKMAGIC; 320 lp->d_magic2 = DISKMAGIC; 321 lp->d_checksum = dkcksum(lp); 322 323 /* 324 * Call the generic disklabel extraction routine 325 */ 326 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, 327 sc->sc_dk.dk_cpulabel, 0); 328 if (errstring) { 329 /*printf("%s: %s\n", sc->sc_dev.dv_xname, errstring);*/ 330 return; 331 } 332 } 333 334 int 335 vndclose(dev, flags, mode, p) 336 dev_t dev; 337 int flags, mode; 338 struct proc *p; 339 { 340 int unit = vndunit(dev); 341 struct vnd_softc *sc; 342 int error = 0, part; 343 344 #ifdef DEBUG 345 if (vnddebug & VDB_FOLLOW) 346 printf("vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p); 347 #endif 348 349 if (unit >= numvnd) 350 return (ENXIO); 351 sc = &vnd_softc[unit]; 352 353 if ((error = vndlock(sc)) != 0) 354 return (error); 355 356 part = DISKPART(dev); 357 358 /* ...that much closer to allowing unconfiguration... */ 359 switch (mode) { 360 case S_IFCHR: 361 sc->sc_dk.dk_copenmask &= ~(1 << part); 362 break; 363 364 case S_IFBLK: 365 sc->sc_dk.dk_bopenmask &= ~(1 << part); 366 break; 367 } 368 sc->sc_dk.dk_openmask = 369 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 370 371 vndunlock(sc); 372 return (0); 373 } 374 375 /* 376 * Two methods are used, the traditional buffercache bypassing and the 377 * newer, cache-coherent on unmount, one. 378 * 379 * Former method: 380 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 381 * Note that this driver can only be used for swapping over NFS on the hp 382 * since nfs_strategy on the vax cannot handle u-areas and page tables. 383 * 384 * Latter method: 385 * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to 386 * access the underlying file. Things are complicated by the fact that we 387 * might get recursively called due to buffer flushes. In those cases we 388 * queue one write. 389 */ 390 void 391 vndstrategy(bp) 392 struct buf *bp; 393 { 394 int unit = vndunit(bp->b_dev); 395 struct vnd_softc *vnd = &vnd_softc[unit]; 396 struct vndbuf *nbp; 397 int bsize; 398 off_t bn; 399 caddr_t addr; 400 size_t resid; 401 int sz, flags, error, s; 402 struct iovec aiov; 403 struct uio auio; 404 struct proc *p = curproc; 405 406 #ifdef DEBUG 407 if (vnddebug & VDB_FOLLOW) 408 printf("vndstrategy(%p): unit %d\n", bp, unit); 409 #endif 410 if ((vnd->sc_flags & VNF_INITED) == 0) { 411 bp->b_error = ENXIO; 412 bp->b_flags |= B_ERROR; 413 s = splbio(); 414 biodone(bp); 415 splx(s); 416 return; 417 } 418 419 bn = bp->b_blkno; 420 sz = howmany(bp->b_bcount, DEV_BSIZE); 421 bp->b_resid = bp->b_bcount; 422 if (bn < 0) { 423 bp->b_error = EINVAL; 424 bp->b_flags |= B_ERROR; 425 s = splbio(); 426 biodone(bp); 427 splx(s); 428 return; 429 } 430 if (DISKPART(bp->b_dev) != RAW_PART && 431 bounds_check_with_label(bp, vnd->sc_dk.dk_label, 432 vnd->sc_dk.dk_cpulabel, 1) <= 0) { 433 s = splbio(); 434 biodone(bp); 435 splx(s); 436 return; 437 } 438 439 /* No bypassing of buffer cache? */ 440 if (vndsimple(bp->b_dev)) { 441 /* 442 * In order to avoid "locking against myself" panics, we 443 * must be prepared to queue operations during another I/O 444 * operation. This situation comes up where a dirty cache 445 * buffer needs to be flushed in order to provide the current 446 * operation with a fresh buffer. 447 * 448 * XXX do we really need to protect stuff relating to this with 449 * splbio? 450 */ 451 if (vnd->sc_flags & VNF_BUSY) { 452 s = splbio(); 453 bp->b_actf = vnd->sc_tab.b_actf; 454 vnd->sc_tab.b_actf = bp; 455 vnd->sc_tab.b_active++; 456 splx(s); 457 return; 458 } 459 460 /* Loop until all queued requests are handled. */ 461 for (;;) { 462 int part = DISKPART(bp->b_dev); 463 int off = vnd->sc_dk.dk_label->d_partitions[part].p_offset; 464 465 aiov.iov_base = bp->b_data; 466 auio.uio_resid = aiov.iov_len = bp->b_bcount; 467 auio.uio_iov = &aiov; 468 auio.uio_iovcnt = 1; 469 auio.uio_offset = dbtob((off_t)(bp->b_blkno + off)); 470 auio.uio_segflg = UIO_SYSSPACE; 471 auio.uio_procp = NULL; 472 473 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p); 474 vnd->sc_flags |= VNF_BUSY; 475 if (bp->b_flags & B_READ) { 476 auio.uio_rw = UIO_READ; 477 bp->b_error = VOP_READ(vnd->sc_vp, &auio, 0, 478 vnd->sc_cred); 479 if (vnd->sc_keyctx) 480 vndencrypt(vnd, bp->b_data, 481 bp->b_bcount, 482 bp->b_blkno, 0); 483 } else { 484 if (vnd->sc_keyctx) 485 vndencrypt(vnd, bp->b_data, 486 bp->b_bcount, 487 bp->b_blkno, 1); 488 auio.uio_rw = UIO_WRITE; 489 bp->b_error = VOP_WRITE(vnd->sc_vp, &auio, 0, 490 vnd->sc_cred); 491 /* Data in buffer cache needs to be in clear */ 492 if (vnd->sc_keyctx) 493 vndencrypt(vnd, bp->b_data, 494 bp->b_bcount, 495 bp->b_blkno, 0); 496 } 497 vnd->sc_flags &= ~VNF_BUSY; 498 VOP_UNLOCK(vnd->sc_vp, 0, p); 499 if (bp->b_error) 500 bp->b_flags |= B_ERROR; 501 bp->b_resid = auio.uio_resid; 502 s = splbio(); 503 biodone(bp); 504 splx(s); 505 506 /* If nothing more is queued, we are done. */ 507 if (!vnd->sc_tab.b_active) 508 return; 509 510 /* 511 * Dequeue now since lower level strategy 512 * routine might queue using same links. 513 */ 514 s = splbio(); 515 bp = vnd->sc_tab.b_actf; 516 vnd->sc_tab.b_actf = bp->b_actf; 517 vnd->sc_tab.b_active--; 518 splx(s); 519 } 520 } 521 522 /* The old-style buffercache bypassing method. */ 523 bn += vnd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)].p_offset; 524 bn = dbtob(bn); 525 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 526 addr = bp->b_data; 527 flags = bp->b_flags | B_CALL; 528 for (resid = bp->b_resid; resid; resid -= sz) { 529 struct vnode *vp; 530 daddr_t nbn; 531 int off, s, nra; 532 533 nra = 0; 534 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); 535 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 536 VOP_UNLOCK(vnd->sc_vp, 0, p); 537 if (error == 0 && (long)nbn == -1) 538 error = EIO; 539 #ifdef DEBUG 540 if (!dovndcluster) 541 nra = 0; 542 #endif 543 544 if ((off = bn % bsize) != 0) 545 sz = bsize - off; 546 else 547 sz = (1 + nra) * bsize; 548 if (resid < sz) 549 sz = resid; 550 #ifdef DEBUG 551 if (vnddebug & VDB_IO) 552 printf("vndstrategy: vp %p/%p bn %x/%x sz %x\n", 553 vnd->sc_vp, vp, bn, nbn, sz); 554 #endif 555 556 nbp = getvndbuf(); 557 nbp->vb_buf.b_flags = flags; 558 nbp->vb_buf.b_bcount = sz; 559 nbp->vb_buf.b_bufsize = bp->b_bufsize; 560 nbp->vb_buf.b_error = 0; 561 if (vp->v_type == VBLK || vp->v_type == VCHR) 562 nbp->vb_buf.b_dev = vp->v_rdev; 563 else 564 nbp->vb_buf.b_dev = NODEV; 565 nbp->vb_buf.b_data = addr; 566 nbp->vb_buf.b_blkno = nbn + btodb(off); 567 nbp->vb_buf.b_proc = bp->b_proc; 568 nbp->vb_buf.b_iodone = vndiodone; 569 nbp->vb_buf.b_vp = vp; 570 nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff; 571 nbp->vb_buf.b_dirtyend = bp->b_dirtyend; 572 nbp->vb_buf.b_validoff = bp->b_validoff; 573 nbp->vb_buf.b_validend = bp->b_validend; 574 LIST_INIT(&nbp->vb_buf.b_dep); 575 576 /* save a reference to the old buffer */ 577 nbp->vb_obp = bp; 578 579 /* 580 * If there was an error or a hole in the file...punt. 581 * Note that we deal with this after the nbp allocation. 582 * This ensures that we properly clean up any operations 583 * that we have already fired off. 584 * 585 * XXX we could deal with holes here but it would be 586 * a hassle (in the write case). 587 */ 588 if (error) { 589 nbp->vb_buf.b_error = error; 590 nbp->vb_buf.b_flags |= B_ERROR; 591 bp->b_resid -= (resid - sz); 592 s = splbio(); 593 biodone(&nbp->vb_buf); 594 splx(s); 595 return; 596 } 597 /* 598 * Just sort by block number 599 */ 600 nbp->vb_buf.b_cylin = nbp->vb_buf.b_blkno; 601 s = splbio(); 602 disksort(&vnd->sc_tab, &nbp->vb_buf); 603 if (vnd->sc_tab.b_active < vnd->sc_maxactive) { 604 vnd->sc_tab.b_active++; 605 vndstart(vnd); 606 } 607 splx(s); 608 609 bn += sz; 610 addr += sz; 611 } 612 } 613 614 /* 615 * Feed requests sequentially. 616 * We do it this way to keep from flooding NFS servers if we are connected 617 * to an NFS file. This places the burden on the client rather than the 618 * server. 619 */ 620 void 621 vndstart(vnd) 622 struct vnd_softc *vnd; 623 { 624 struct buf *bp; 625 626 /* 627 * Dequeue now since lower level strategy routine might 628 * queue using same links 629 */ 630 bp = vnd->sc_tab.b_actf; 631 vnd->sc_tab.b_actf = bp->b_actf; 632 #ifdef DEBUG 633 if (vnddebug & VDB_IO) 634 printf("vndstart(%d): bp %p vp %p blkno %x addr %p cnt %lx\n", 635 vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data, 636 bp->b_bcount); 637 #endif 638 639 /* Instrumentation. */ 640 disk_busy(&vnd->sc_dk); 641 642 if ((bp->b_flags & B_READ) == 0) 643 bp->b_vp->v_numoutput++; 644 VOP_STRATEGY(bp); 645 } 646 647 void 648 vndiodone(bp) 649 struct buf *bp; 650 { 651 struct vndbuf *vbp = (struct vndbuf *) bp; 652 struct buf *pbp = vbp->vb_obp; 653 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 654 long count; 655 656 splassert(IPL_BIO); 657 658 #ifdef DEBUG 659 if (vnddebug & VDB_IO) 660 printf("vndiodone(%d): vbp %p vp %p blkno %x addr %p cnt %lx\n", 661 vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, 662 vbp->vb_buf.b_data, vbp->vb_buf.b_bcount); 663 #endif 664 665 if (vbp->vb_buf.b_error) { 666 #ifdef DEBUG 667 if (vnddebug & VDB_IO) 668 printf("vndiodone: vbp %p error %d\n", vbp, 669 vbp->vb_buf.b_error); 670 #endif 671 pbp->b_flags |= B_ERROR; 672 pbp->b_error = biowait(&vbp->vb_buf); 673 } 674 pbp->b_resid -= vbp->vb_buf.b_bcount; 675 putvndbuf(vbp); 676 count = pbp->b_bcount - pbp->b_resid; 677 if (pbp->b_resid == 0) { 678 #ifdef DEBUG 679 if (vnddebug & VDB_IO) 680 printf("vndiodone: pbp %p iodone\n", pbp); 681 #endif 682 biodone(pbp); 683 } 684 if (vnd->sc_tab.b_active) { 685 disk_unbusy(&vnd->sc_dk, count); 686 if (vnd->sc_tab.b_actf) 687 vndstart(vnd); 688 else 689 vnd->sc_tab.b_active--; 690 } 691 } 692 693 /* ARGSUSED */ 694 int 695 vndread(dev, uio, flags) 696 dev_t dev; 697 struct uio *uio; 698 int flags; 699 { 700 int unit = vndunit(dev); 701 struct vnd_softc *sc; 702 703 #ifdef DEBUG 704 if (vnddebug & VDB_FOLLOW) 705 printf("vndread(%x, %p)\n", dev, uio); 706 #endif 707 708 if (unit >= numvnd) 709 return (ENXIO); 710 sc = &vnd_softc[unit]; 711 712 if ((sc->sc_flags & VNF_INITED) == 0) 713 return (ENXIO); 714 715 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 716 } 717 718 /* ARGSUSED */ 719 int 720 vndwrite(dev, uio, flags) 721 dev_t dev; 722 struct uio *uio; 723 int flags; 724 { 725 int unit = vndunit(dev); 726 struct vnd_softc *sc; 727 728 #ifdef DEBUG 729 if (vnddebug & VDB_FOLLOW) 730 printf("vndwrite(%x, %p)\n", dev, uio); 731 #endif 732 733 if (unit >= numvnd) 734 return (ENXIO); 735 sc = &vnd_softc[unit]; 736 737 if ((sc->sc_flags & VNF_INITED) == 0) 738 return (ENXIO); 739 740 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 741 } 742 743 /* ARGSUSED */ 744 int 745 vndioctl(dev, cmd, addr, flag, p) 746 dev_t dev; 747 u_long cmd; 748 caddr_t addr; 749 int flag; 750 struct proc *p; 751 { 752 int unit = vndunit(dev); 753 struct vnd_softc *vnd; 754 struct vnd_ioctl *vio; 755 struct vattr vattr; 756 struct nameidata nd; 757 int error, part, pmask, s; 758 759 #ifdef DEBUG 760 if (vnddebug & VDB_FOLLOW) 761 printf("vndioctl(%x, %lx, %p, %x, %p): unit %d\n", 762 dev, cmd, addr, flag, p, unit); 763 #endif 764 error = suser(p, 0); 765 if (error) 766 return (error); 767 if (unit >= numvnd) 768 return (ENXIO); 769 770 vnd = &vnd_softc[unit]; 771 vio = (struct vnd_ioctl *)addr; 772 switch (cmd) { 773 774 case VNDIOCSET: 775 if (vnd->sc_flags & VNF_INITED) 776 return (EBUSY); 777 if (!(vnd->sc_flags & VNF_SIMPLE) && vio->vnd_keylen) 778 return (EINVAL); 779 780 if ((error = vndlock(vnd)) != 0) 781 return (error); 782 783 bzero(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname)); 784 if (snprintf(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname), 785 "vnd%d", unit) >= sizeof(vnd->sc_dev.dv_xname)) { 786 printf("VNDIOCSET: device name too long\n"); 787 vndunlock(vnd); 788 return(ENXIO); 789 } 790 791 /* 792 * Always open for read and write. 793 * This is probably bogus, but it lets vn_open() 794 * weed out directories, sockets, etc. so we don't 795 * have to worry about them. 796 */ 797 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 798 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 799 vndunlock(vnd); 800 return (error); 801 } 802 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 803 if (error) { 804 VOP_UNLOCK(nd.ni_vp, 0, p); 805 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 806 vndunlock(vnd); 807 return (error); 808 } 809 VOP_UNLOCK(nd.ni_vp, 0, p); 810 vnd->sc_vp = nd.ni_vp; 811 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 812 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) { 813 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 814 vndunlock(vnd); 815 return (error); 816 } 817 818 if (vio->vnd_keylen) { 819 char *key; 820 821 key = malloc(vio->vnd_keylen, M_TEMP, M_WAITOK); 822 if ((error = copyin((caddr_t)vio->vnd_key, key, 823 vio->vnd_keylen)) != 0) { 824 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 825 p->p_ucred, p); 826 vndunlock(vnd); 827 return (error); 828 } 829 830 vnd->sc_keyctx = malloc(sizeof(blf_ctx), M_DEVBUF, 831 M_WAITOK); 832 blf_key(vnd->sc_keyctx, key, vio->vnd_keylen); 833 bzero(key, vio->vnd_keylen); 834 free((caddr_t)key, M_TEMP); 835 } else 836 vnd->sc_keyctx = NULL; 837 838 vndthrottle(vnd, vnd->sc_vp); 839 vio->vnd_size = dbtob((off_t)vnd->sc_size); 840 vnd->sc_flags |= VNF_INITED; 841 #ifdef DEBUG 842 if (vnddebug & VDB_INIT) 843 printf("vndioctl: SET vp %p size %x\n", 844 vnd->sc_vp, vnd->sc_size); 845 #endif 846 847 /* Attach the disk. */ 848 vnd->sc_dk.dk_driver = &vnddkdriver; 849 vnd->sc_dk.dk_name = vnd->sc_dev.dv_xname; 850 disk_attach(&vnd->sc_dk); 851 dk_establish(&vnd->sc_dk, &vnd->sc_dev); 852 853 vndunlock(vnd); 854 855 break; 856 857 case VNDIOCCLR: 858 if ((vnd->sc_flags & VNF_INITED) == 0) 859 return (ENXIO); 860 861 if ((error = vndlock(vnd)) != 0) 862 return (error); 863 864 /* 865 * Don't unconfigure if any other partitions are open 866 * or if both the character and block flavors of this 867 * partition are open. 868 */ 869 part = DISKPART(dev); 870 pmask = (1 << part); 871 if ((vnd->sc_dk.dk_openmask & ~pmask) || 872 ((vnd->sc_dk.dk_bopenmask & pmask) && 873 (vnd->sc_dk.dk_copenmask & pmask))) { 874 vndunlock(vnd); 875 return (EBUSY); 876 } 877 878 vndclear(vnd); 879 #ifdef DEBUG 880 if (vnddebug & VDB_INIT) 881 printf("vndioctl: CLRed\n"); 882 #endif 883 /* Free crypto key */ 884 if (vnd->sc_keyctx) { 885 bzero(vnd->sc_keyctx, vio->vnd_keylen); 886 free((caddr_t)vnd->sc_keyctx, M_DEVBUF); 887 } 888 889 /* Detatch the disk. */ 890 disk_detach(&vnd->sc_dk); 891 892 /* This must be atomic. */ 893 s = splhigh(); 894 vndunlock(vnd); 895 bzero(vnd, sizeof(struct vnd_softc)); 896 splx(s); 897 break; 898 899 case DIOCGDINFO: 900 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 901 return (ENOTTY); 902 *(struct disklabel *)addr = *(vnd->sc_dk.dk_label); 903 return (0); 904 905 case DIOCGPART: 906 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 907 return (ENOTTY); 908 ((struct partinfo *)addr)->disklab = vnd->sc_dk.dk_label; 909 ((struct partinfo *)addr)->part = 910 &vnd->sc_dk.dk_label->d_partitions[DISKPART(dev)]; 911 return (0); 912 913 case DIOCWDINFO: 914 case DIOCSDINFO: 915 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 916 return (ENOTTY); 917 if ((flag & FWRITE) == 0) 918 return (EBADF); 919 920 if ((error = vndlock(vnd)) != 0) 921 return (error); 922 vnd->sc_flags |= VNF_LABELLING; 923 924 error = setdisklabel(vnd->sc_dk.dk_label, 925 (struct disklabel *)addr, /*vnd->sc_dk.dk_openmask : */0, 926 vnd->sc_dk.dk_cpulabel); 927 if (error == 0) { 928 if (cmd == DIOCWDINFO) 929 error = writedisklabel(MAKEDISKDEV(major(dev), 930 DISKUNIT(dev), RAW_PART), 931 vndstrategy, vnd->sc_dk.dk_label, 932 vnd->sc_dk.dk_cpulabel); 933 } 934 935 vnd->sc_flags &= ~VNF_LABELLING; 936 vndunlock(vnd); 937 return (error); 938 939 case DIOCWLABEL: 940 if ((flag & FWRITE) == 0) 941 return (EBADF); 942 if (*(int *)addr) 943 vnd->sc_flags |= VNF_WLABEL; 944 else 945 vnd->sc_flags &= ~VNF_WLABEL; 946 return (0); 947 948 default: 949 return (ENOTTY); 950 } 951 952 return (0); 953 } 954 955 /* 956 * Duplicate the current processes' credentials. Since we are called only 957 * as the result of a SET ioctl and only root can do that, any future access 958 * to this "disk" is essentially as root. Note that credentials may change 959 * if some other uid can write directly to the mapped file (NFS). 960 */ 961 int 962 vndsetcred(vnd, cred) 963 struct vnd_softc *vnd; 964 struct ucred *cred; 965 { 966 struct uio auio; 967 struct iovec aiov; 968 char *tmpbuf; 969 int error; 970 struct proc *p = curproc; 971 972 vnd->sc_cred = crdup(cred); 973 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 974 975 /* XXX: Horrible kludge to establish credentials for NFS */ 976 aiov.iov_base = tmpbuf; 977 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 978 auio.uio_iov = &aiov; 979 auio.uio_iovcnt = 1; 980 auio.uio_offset = 0; 981 auio.uio_rw = UIO_READ; 982 auio.uio_segflg = UIO_SYSSPACE; 983 auio.uio_resid = aiov.iov_len; 984 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); 985 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 986 VOP_UNLOCK(vnd->sc_vp, 0, p); 987 988 free(tmpbuf, M_TEMP); 989 return (error); 990 } 991 992 /* 993 * Set maxactive based on FS type 994 */ 995 void 996 vndthrottle(vnd, vp) 997 struct vnd_softc *vnd; 998 struct vnode *vp; 999 { 1000 #ifdef NFSCLIENT 1001 extern int (**nfsv2_vnodeop_p)(void *); 1002 1003 if (vp->v_op == nfsv2_vnodeop_p) 1004 vnd->sc_maxactive = 2; 1005 else 1006 #endif 1007 vnd->sc_maxactive = 8; 1008 } 1009 1010 void 1011 vndshutdown() 1012 { 1013 struct vnd_softc *vnd; 1014 1015 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1016 if (vnd->sc_flags & VNF_INITED) 1017 vndclear(vnd); 1018 } 1019 1020 void 1021 vndclear(vnd) 1022 struct vnd_softc *vnd; 1023 { 1024 struct vnode *vp = vnd->sc_vp; 1025 struct proc *p = curproc; /* XXX */ 1026 1027 #ifdef DEBUG 1028 if (vnddebug & VDB_FOLLOW) 1029 printf("vndclear(%p): vp %p\n", vnd, vp); 1030 #endif 1031 vnd->sc_flags &= ~VNF_INITED; 1032 if (vp == (struct vnode *)0) 1033 panic("vndioctl: null vp"); 1034 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 1035 crfree(vnd->sc_cred); 1036 vnd->sc_vp = (struct vnode *)0; 1037 vnd->sc_cred = (struct ucred *)0; 1038 vnd->sc_size = 0; 1039 } 1040 1041 int 1042 vndsize(dev) 1043 dev_t dev; 1044 { 1045 int unit = vndunit(dev); 1046 struct vnd_softc *vnd = &vnd_softc[unit]; 1047 1048 if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0) 1049 return (-1); 1050 return (vnd->sc_size); 1051 } 1052 1053 int 1054 vnddump(dev, blkno, va, size) 1055 dev_t dev; 1056 daddr_t blkno; 1057 caddr_t va; 1058 size_t size; 1059 { 1060 1061 /* Not implemented. */ 1062 return (ENXIO); 1063 } 1064 1065 /* 1066 * Wait interruptibly for an exclusive lock. 1067 * 1068 * XXX 1069 * Several drivers do this; it should be abstracted and made MP-safe. 1070 */ 1071 int 1072 vndlock(sc) 1073 struct vnd_softc *sc; 1074 { 1075 int error; 1076 1077 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1078 sc->sc_flags |= VNF_WANTED; 1079 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1080 return (error); 1081 } 1082 sc->sc_flags |= VNF_LOCKED; 1083 return (0); 1084 } 1085 1086 /* 1087 * Unlock and wake up any waiters. 1088 */ 1089 void 1090 vndunlock(sc) 1091 struct vnd_softc *sc; 1092 { 1093 1094 sc->sc_flags &= ~VNF_LOCKED; 1095 if ((sc->sc_flags & VNF_WANTED) != 0) { 1096 sc->sc_flags &= ~VNF_WANTED; 1097 wakeup(sc); 1098 } 1099 } 1100