1 /* $OpenBSD: vnd.c,v 1.38 2003/06/02 23:28:01 millert Exp $ */ 2 /* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1988 University of Utah. 6 * Copyright (c) 1990, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 38 * 39 * @(#)vn.c 8.6 (Berkeley) 4/1/94 40 */ 41 42 /* 43 * Vnode disk driver. 44 * 45 * Block/character interface to a vnode. Allows one to treat a file 46 * as a disk (e.g. build a filesystem in it, mount it, etc.). 47 * 48 * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the 49 * vnode or simple VOP_READ/VOP_WRITE. The former is suitable for swapping 50 * as it doesn't distort the local buffer cache. The latter is good for 51 * building disk images as it keeps the cache consistent after the block 52 * device is closed. 53 * 54 * NOTE 2: There is a security issue involved with this driver. 55 * Once mounted all access to the contents of the "mapped" file via 56 * the special file is controlled by the permissions on the special 57 * file, the protection of the mapped file is ignored (effectively, 58 * by using root credentials in all transactions). 59 * 60 * NOTE 3: Doesn't interact with leases, should it? 61 */ 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/namei.h> 66 #include <sys/proc.h> 67 #include <sys/errno.h> 68 #include <sys/buf.h> 69 #include <sys/malloc.h> 70 #include <sys/ioctl.h> 71 #include <sys/disklabel.h> 72 #include <sys/device.h> 73 #include <sys/disk.h> 74 #include <sys/stat.h> 75 #include <sys/mount.h> 76 #include <sys/vnode.h> 77 #include <sys/file.h> 78 #include <sys/uio.h> 79 #include <sys/conf.h> 80 81 #include <crypto/blf.h> 82 83 #include <miscfs/specfs/specdev.h> 84 85 #include <dev/vndioctl.h> 86 87 #ifdef DEBUG 88 int dovndcluster = 1; 89 int vnddebug = 0x00; 90 #define VDB_FOLLOW 0x01 91 #define VDB_INIT 0x02 92 #define VDB_IO 0x04 93 #endif 94 95 #define b_cylin b_resid 96 97 #define vndunit(x) DISKUNIT((x) & 0x7f) 98 #define vndsimple(x) ((x) & 0x80) 99 #define MAKEVNDDEV(maj, unit, part) MAKEDISKDEV(maj, unit, part) 100 101 #define VNDLABELDEV(dev) (MAKEVNDDEV(major(dev), vndunit(dev), RAW_PART)) 102 103 struct vndbuf { 104 struct buf vb_buf; 105 struct buf *vb_obp; 106 }; 107 108 #define getvndbuf() \ 109 ((struct vndbuf *)malloc(sizeof(struct vndbuf), M_DEVBUF, M_WAITOK)) 110 #define putvndbuf(vbp) \ 111 free((caddr_t)(vbp), M_DEVBUF) 112 113 struct vnd_softc { 114 struct device sc_dev; 115 struct disk sc_dk; 116 117 int sc_flags; /* flags */ 118 size_t sc_size; /* size of vnd in blocks */ 119 struct vnode *sc_vp; /* vnode */ 120 struct ucred *sc_cred; /* credentials */ 121 int sc_maxactive; /* max # of active requests */ 122 struct buf sc_tab; /* transfer queue */ 123 void *sc_keyctx; /* key context */ 124 }; 125 126 /* sc_flags */ 127 #define VNF_ALIVE 0x0001 128 #define VNF_INITED 0x0002 129 #define VNF_WANTED 0x0040 130 #define VNF_LOCKED 0x0080 131 #define VNF_LABELLING 0x0100 132 #define VNF_WLABEL 0x0200 133 #define VNF_HAVELABEL 0x0400 134 #define VNF_BUSY 0x0800 135 #define VNF_SIMPLE 0x1000 136 137 struct vnd_softc *vnd_softc; 138 int numvnd = 0; 139 140 struct dkdriver vnddkdriver = { vndstrategy }; 141 142 /* called by main() at boot time */ 143 void vndattach(int); 144 145 void vndclear(struct vnd_softc *); 146 void vndstart(struct vnd_softc *); 147 int vndsetcred(struct vnd_softc *, struct ucred *); 148 void vndthrottle(struct vnd_softc *, struct vnode *); 149 void vndiodone(struct buf *); 150 void vndshutdown(void); 151 void vndgetdisklabel(dev_t, struct vnd_softc *); 152 void vndencrypt(struct vnd_softc *, caddr_t, size_t, daddr_t, int); 153 154 int vndlock(struct vnd_softc *); 155 void vndunlock(struct vnd_softc *); 156 157 void 158 vndencrypt(vnd, addr, size, off, encrypt) 159 struct vnd_softc *vnd; 160 caddr_t addr; 161 size_t size; 162 daddr_t off; 163 int encrypt; 164 { 165 int i, bsize; 166 u_char iv[8]; 167 168 bsize = dbtob(1); 169 for (i = 0; i < size/bsize; i++) { 170 bzero(iv, sizeof(iv)); 171 bcopy((u_char *)&off, iv, sizeof(off)); 172 blf_ecb_encrypt(vnd->sc_keyctx, iv, sizeof(iv)); 173 if (encrypt) 174 blf_cbc_encrypt(vnd->sc_keyctx, iv, addr, bsize); 175 else 176 blf_cbc_decrypt(vnd->sc_keyctx, iv, addr, bsize); 177 178 addr += bsize; 179 off++; 180 } 181 } 182 183 void 184 vndattach(num) 185 int num; 186 { 187 char *mem; 188 u_long size; 189 190 if (num <= 0) 191 return; 192 size = num * sizeof(struct vnd_softc); 193 mem = malloc(size, M_DEVBUF, M_NOWAIT); 194 if (mem == NULL) { 195 printf("WARNING: no memory for vnode disks\n"); 196 return; 197 } 198 bzero(mem, size); 199 vnd_softc = (struct vnd_softc *)mem; 200 numvnd = num; 201 } 202 203 int 204 vndopen(dev, flags, mode, p) 205 dev_t dev; 206 int flags, mode; 207 struct proc *p; 208 { 209 int unit = vndunit(dev); 210 struct vnd_softc *sc; 211 int error = 0, part, pmask; 212 213 #ifdef DEBUG 214 if (vnddebug & VDB_FOLLOW) 215 printf("vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p); 216 #endif 217 if (unit >= numvnd) 218 return (ENXIO); 219 sc = &vnd_softc[unit]; 220 221 if ((error = vndlock(sc)) != 0) 222 return (error); 223 224 if ((sc->sc_flags & VNF_INITED) && 225 (sc->sc_flags & VNF_HAVELABEL) == 0) { 226 sc->sc_flags |= VNF_HAVELABEL; 227 vndgetdisklabel(dev, sc); 228 } 229 230 part = DISKPART(dev); 231 pmask = 1 << part; 232 233 /* 234 * If any partition is open, all succeeding openings must be of the 235 * same type. 236 */ 237 if (sc->sc_dk.dk_openmask) { 238 if (((sc->sc_flags & VNF_SIMPLE) != 0) != 239 (vndsimple(dev) != 0)) { 240 error = EBUSY; 241 goto bad; 242 } 243 } else if (vndsimple(dev)) 244 sc->sc_flags |= VNF_SIMPLE; 245 else 246 sc->sc_flags &= ~VNF_SIMPLE; 247 248 /* Check that the partition exists. */ 249 if (part != RAW_PART && 250 ((sc->sc_flags & VNF_HAVELABEL) == 0 || 251 part >= sc->sc_dk.dk_label->d_npartitions || 252 sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) { 253 error = ENXIO; 254 goto bad; 255 } 256 257 /* Prevent our unit from being unconfigured while open. */ 258 switch (mode) { 259 case S_IFCHR: 260 sc->sc_dk.dk_copenmask |= pmask; 261 break; 262 263 case S_IFBLK: 264 sc->sc_dk.dk_bopenmask |= pmask; 265 break; 266 } 267 sc->sc_dk.dk_openmask = 268 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 269 270 vndunlock(sc); 271 return (0); 272 bad: 273 vndunlock(sc); 274 return (error); 275 } 276 277 /* 278 * Load the label information on the named device 279 */ 280 void 281 vndgetdisklabel(dev, sc) 282 dev_t dev; 283 struct vnd_softc *sc; 284 { 285 struct disklabel *lp = sc->sc_dk.dk_label; 286 char *errstring; 287 288 bzero(lp, sizeof(struct disklabel)); 289 bzero(sc->sc_dk.dk_cpulabel, sizeof(struct cpu_disklabel)); 290 291 lp->d_secsize = 512; 292 lp->d_ntracks = 1; 293 lp->d_nsectors = 100; 294 lp->d_ncylinders = sc->sc_size / 100; 295 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 296 if (lp->d_secpercyl == 0) { 297 lp->d_secpercyl = 100; 298 /* as long as it's not 0 - readdisklabel divides by it (?) */ 299 } 300 301 strncpy(lp->d_typename, "vnd device", sizeof(lp->d_typename)); 302 lp->d_type = DTYPE_SCSI; 303 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 304 lp->d_secperunit = sc->sc_size; 305 lp->d_rpm = 3600; 306 lp->d_interleave = 1; 307 lp->d_flags = 0; 308 309 lp->d_partitions[RAW_PART].p_offset = 0; 310 lp->d_partitions[RAW_PART].p_size = 311 lp->d_secperunit * (lp->d_secsize / DEV_BSIZE); 312 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 313 lp->d_npartitions = RAW_PART + 1; 314 315 lp->d_magic = DISKMAGIC; 316 lp->d_magic2 = DISKMAGIC; 317 lp->d_checksum = dkcksum(lp); 318 319 /* 320 * Call the generic disklabel extraction routine 321 */ 322 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, 323 sc->sc_dk.dk_cpulabel, 0); 324 if (errstring) { 325 /*printf("%s: %s\n", sc->sc_dev.dv_xname, errstring);*/ 326 return; 327 } 328 } 329 330 int 331 vndclose(dev, flags, mode, p) 332 dev_t dev; 333 int flags, mode; 334 struct proc *p; 335 { 336 int unit = vndunit(dev); 337 struct vnd_softc *sc; 338 int error = 0, part; 339 340 #ifdef DEBUG 341 if (vnddebug & VDB_FOLLOW) 342 printf("vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p); 343 #endif 344 345 if (unit >= numvnd) 346 return (ENXIO); 347 sc = &vnd_softc[unit]; 348 349 if ((error = vndlock(sc)) != 0) 350 return (error); 351 352 part = DISKPART(dev); 353 354 /* ...that much closer to allowing unconfiguration... */ 355 switch (mode) { 356 case S_IFCHR: 357 sc->sc_dk.dk_copenmask &= ~(1 << part); 358 break; 359 360 case S_IFBLK: 361 sc->sc_dk.dk_bopenmask &= ~(1 << part); 362 break; 363 } 364 sc->sc_dk.dk_openmask = 365 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 366 367 vndunlock(sc); 368 return (0); 369 } 370 371 /* 372 * Two methods are used, the traditional buffercache bypassing and the 373 * newer, cache-coherent on unmount, one. 374 * 375 * Former method: 376 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 377 * Note that this driver can only be used for swapping over NFS on the hp 378 * since nfs_strategy on the vax cannot handle u-areas and page tables. 379 * 380 * Latter method: 381 * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to 382 * access the underlying file. Things are complicated by the fact that we 383 * might get recursively called due to buffer flushes. In those cases we 384 * queue one write. 385 */ 386 void 387 vndstrategy(bp) 388 struct buf *bp; 389 { 390 int unit = vndunit(bp->b_dev); 391 struct vnd_softc *vnd = &vnd_softc[unit]; 392 struct vndbuf *nbp; 393 int bsize; 394 off_t bn; 395 caddr_t addr; 396 size_t resid; 397 int sz, flags, error, s; 398 struct iovec aiov; 399 struct uio auio; 400 struct proc *p = curproc; 401 402 #ifdef DEBUG 403 if (vnddebug & VDB_FOLLOW) 404 printf("vndstrategy(%p): unit %d\n", bp, unit); 405 #endif 406 if ((vnd->sc_flags & VNF_INITED) == 0) { 407 bp->b_error = ENXIO; 408 bp->b_flags |= B_ERROR; 409 s = splbio(); 410 biodone(bp); 411 splx(s); 412 return; 413 } 414 415 bn = bp->b_blkno; 416 sz = howmany(bp->b_bcount, DEV_BSIZE); 417 bp->b_resid = bp->b_bcount; 418 if (bn < 0) { 419 bp->b_error = EINVAL; 420 bp->b_flags |= B_ERROR; 421 s = splbio(); 422 biodone(bp); 423 splx(s); 424 return; 425 } 426 if (DISKPART(bp->b_dev) != RAW_PART && 427 bounds_check_with_label(bp, vnd->sc_dk.dk_label, 428 vnd->sc_dk.dk_cpulabel, 1) <= 0) { 429 s = splbio(); 430 biodone(bp); 431 splx(s); 432 return; 433 } 434 435 /* No bypassing of buffer cache? */ 436 if (vndsimple(bp->b_dev)) { 437 /* 438 * In order to avoid "locking against myself" panics, we 439 * must be prepared to queue operations during another I/O 440 * operation. This situation comes up where a dirty cache 441 * buffer needs to be flushed in order to provide the current 442 * operation with a fresh buffer. 443 * 444 * XXX do we really need to protect stuff relating to this with 445 * splbio? 446 */ 447 if (vnd->sc_flags & VNF_BUSY) { 448 s = splbio(); 449 bp->b_actf = vnd->sc_tab.b_actf; 450 vnd->sc_tab.b_actf = bp; 451 vnd->sc_tab.b_active++; 452 splx(s); 453 return; 454 } 455 456 /* Loop until all queued requests are handled. */ 457 for (;;) { 458 int part = DISKPART(bp->b_dev); 459 int off = vnd->sc_dk.dk_label->d_partitions[part].p_offset; 460 461 aiov.iov_base = bp->b_data; 462 auio.uio_resid = aiov.iov_len = bp->b_bcount; 463 auio.uio_iov = &aiov; 464 auio.uio_iovcnt = 1; 465 auio.uio_offset = dbtob((off_t)(bp->b_blkno + off)); 466 auio.uio_segflg = UIO_SYSSPACE; 467 auio.uio_procp = NULL; 468 469 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p); 470 vnd->sc_flags |= VNF_BUSY; 471 if (bp->b_flags & B_READ) { 472 auio.uio_rw = UIO_READ; 473 bp->b_error = VOP_READ(vnd->sc_vp, &auio, 0, 474 vnd->sc_cred); 475 if (vnd->sc_keyctx) 476 vndencrypt(vnd, bp->b_data, 477 bp->b_bcount, 478 bp->b_blkno, 0); 479 } else { 480 if (vnd->sc_keyctx) 481 vndencrypt(vnd, bp->b_data, 482 bp->b_bcount, 483 bp->b_blkno, 1); 484 auio.uio_rw = UIO_WRITE; 485 bp->b_error = VOP_WRITE(vnd->sc_vp, &auio, 0, 486 vnd->sc_cred); 487 /* Data in buffer cache needs to be in clear */ 488 if (vnd->sc_keyctx) 489 vndencrypt(vnd, bp->b_data, 490 bp->b_bcount, 491 bp->b_blkno, 0); 492 } 493 vnd->sc_flags &= ~VNF_BUSY; 494 VOP_UNLOCK(vnd->sc_vp, 0, p); 495 if (bp->b_error) 496 bp->b_flags |= B_ERROR; 497 bp->b_resid = auio.uio_resid; 498 s = splbio(); 499 biodone(bp); 500 splx(s); 501 502 /* If nothing more is queued, we are done. */ 503 if (!vnd->sc_tab.b_active) 504 return; 505 506 /* 507 * Dequeue now since lower level strategy 508 * routine might queue using same links. 509 */ 510 s = splbio(); 511 bp = vnd->sc_tab.b_actf; 512 vnd->sc_tab.b_actf = bp->b_actf; 513 vnd->sc_tab.b_active--; 514 splx(s); 515 } 516 } 517 518 /* The old-style buffercache bypassing method. */ 519 bn += vnd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)].p_offset; 520 bn = dbtob(bn); 521 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 522 addr = bp->b_data; 523 flags = bp->b_flags | B_CALL; 524 for (resid = bp->b_resid; resid; resid -= sz) { 525 struct vnode *vp; 526 daddr_t nbn; 527 int off, s, nra; 528 529 nra = 0; 530 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); 531 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 532 VOP_UNLOCK(vnd->sc_vp, 0, p); 533 if (error == 0 && (long)nbn == -1) 534 error = EIO; 535 #ifdef DEBUG 536 if (!dovndcluster) 537 nra = 0; 538 #endif 539 540 if ((off = bn % bsize) != 0) 541 sz = bsize - off; 542 else 543 sz = (1 + nra) * bsize; 544 if (resid < sz) 545 sz = resid; 546 #ifdef DEBUG 547 if (vnddebug & VDB_IO) 548 printf("vndstrategy: vp %p/%p bn %x/%x sz %x\n", 549 vnd->sc_vp, vp, bn, nbn, sz); 550 #endif 551 552 nbp = getvndbuf(); 553 nbp->vb_buf.b_flags = flags; 554 nbp->vb_buf.b_bcount = sz; 555 nbp->vb_buf.b_bufsize = bp->b_bufsize; 556 nbp->vb_buf.b_error = 0; 557 if (vp->v_type == VBLK || vp->v_type == VCHR) 558 nbp->vb_buf.b_dev = vp->v_rdev; 559 else 560 nbp->vb_buf.b_dev = NODEV; 561 nbp->vb_buf.b_data = addr; 562 nbp->vb_buf.b_blkno = nbn + btodb(off); 563 nbp->vb_buf.b_proc = bp->b_proc; 564 nbp->vb_buf.b_iodone = vndiodone; 565 nbp->vb_buf.b_vp = vp; 566 nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff; 567 nbp->vb_buf.b_dirtyend = bp->b_dirtyend; 568 nbp->vb_buf.b_validoff = bp->b_validoff; 569 nbp->vb_buf.b_validend = bp->b_validend; 570 LIST_INIT(&nbp->vb_buf.b_dep); 571 572 /* save a reference to the old buffer */ 573 nbp->vb_obp = bp; 574 575 /* 576 * If there was an error or a hole in the file...punt. 577 * Note that we deal with this after the nbp allocation. 578 * This ensures that we properly clean up any operations 579 * that we have already fired off. 580 * 581 * XXX we could deal with holes here but it would be 582 * a hassle (in the write case). 583 */ 584 if (error) { 585 nbp->vb_buf.b_error = error; 586 nbp->vb_buf.b_flags |= B_ERROR; 587 bp->b_resid -= (resid - sz); 588 s = splbio(); 589 biodone(&nbp->vb_buf); 590 splx(s); 591 return; 592 } 593 /* 594 * Just sort by block number 595 */ 596 nbp->vb_buf.b_cylin = nbp->vb_buf.b_blkno; 597 s = splbio(); 598 disksort(&vnd->sc_tab, &nbp->vb_buf); 599 if (vnd->sc_tab.b_active < vnd->sc_maxactive) { 600 vnd->sc_tab.b_active++; 601 vndstart(vnd); 602 } 603 splx(s); 604 605 bn += sz; 606 addr += sz; 607 } 608 } 609 610 /* 611 * Feed requests sequentially. 612 * We do it this way to keep from flooding NFS servers if we are connected 613 * to an NFS file. This places the burden on the client rather than the 614 * server. 615 */ 616 void 617 vndstart(vnd) 618 struct vnd_softc *vnd; 619 { 620 struct buf *bp; 621 622 /* 623 * Dequeue now since lower level strategy routine might 624 * queue using same links 625 */ 626 bp = vnd->sc_tab.b_actf; 627 vnd->sc_tab.b_actf = bp->b_actf; 628 #ifdef DEBUG 629 if (vnddebug & VDB_IO) 630 printf("vndstart(%d): bp %p vp %p blkno %x addr %p cnt %lx\n", 631 vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data, 632 bp->b_bcount); 633 #endif 634 635 /* Instrumentation. */ 636 disk_busy(&vnd->sc_dk); 637 638 if ((bp->b_flags & B_READ) == 0) 639 bp->b_vp->v_numoutput++; 640 VOP_STRATEGY(bp); 641 } 642 643 void 644 vndiodone(bp) 645 struct buf *bp; 646 { 647 struct vndbuf *vbp = (struct vndbuf *) bp; 648 struct buf *pbp = vbp->vb_obp; 649 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 650 long count; 651 652 splassert(IPL_BIO); 653 654 #ifdef DEBUG 655 if (vnddebug & VDB_IO) 656 printf("vndiodone(%d): vbp %p vp %p blkno %x addr %p cnt %lx\n", 657 vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, 658 vbp->vb_buf.b_data, vbp->vb_buf.b_bcount); 659 #endif 660 661 if (vbp->vb_buf.b_error) { 662 #ifdef DEBUG 663 if (vnddebug & VDB_IO) 664 printf("vndiodone: vbp %p error %d\n", vbp, 665 vbp->vb_buf.b_error); 666 #endif 667 pbp->b_flags |= B_ERROR; 668 pbp->b_error = biowait(&vbp->vb_buf); 669 } 670 pbp->b_resid -= vbp->vb_buf.b_bcount; 671 putvndbuf(vbp); 672 count = pbp->b_bcount - pbp->b_resid; 673 if (pbp->b_resid == 0) { 674 #ifdef DEBUG 675 if (vnddebug & VDB_IO) 676 printf("vndiodone: pbp %p iodone\n", pbp); 677 #endif 678 biodone(pbp); 679 } 680 if (vnd->sc_tab.b_active) { 681 disk_unbusy(&vnd->sc_dk, count); 682 if (vnd->sc_tab.b_actf) 683 vndstart(vnd); 684 else 685 vnd->sc_tab.b_active--; 686 } 687 } 688 689 /* ARGSUSED */ 690 int 691 vndread(dev, uio, flags) 692 dev_t dev; 693 struct uio *uio; 694 int flags; 695 { 696 int unit = vndunit(dev); 697 struct vnd_softc *sc; 698 699 #ifdef DEBUG 700 if (vnddebug & VDB_FOLLOW) 701 printf("vndread(%x, %p)\n", dev, uio); 702 #endif 703 704 if (unit >= numvnd) 705 return (ENXIO); 706 sc = &vnd_softc[unit]; 707 708 if ((sc->sc_flags & VNF_INITED) == 0) 709 return (ENXIO); 710 711 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 712 } 713 714 /* ARGSUSED */ 715 int 716 vndwrite(dev, uio, flags) 717 dev_t dev; 718 struct uio *uio; 719 int flags; 720 { 721 int unit = vndunit(dev); 722 struct vnd_softc *sc; 723 724 #ifdef DEBUG 725 if (vnddebug & VDB_FOLLOW) 726 printf("vndwrite(%x, %p)\n", dev, uio); 727 #endif 728 729 if (unit >= numvnd) 730 return (ENXIO); 731 sc = &vnd_softc[unit]; 732 733 if ((sc->sc_flags & VNF_INITED) == 0) 734 return (ENXIO); 735 736 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 737 } 738 739 /* ARGSUSED */ 740 int 741 vndioctl(dev, cmd, addr, flag, p) 742 dev_t dev; 743 u_long cmd; 744 caddr_t addr; 745 int flag; 746 struct proc *p; 747 { 748 int unit = vndunit(dev); 749 struct vnd_softc *vnd; 750 struct vnd_ioctl *vio; 751 struct vattr vattr; 752 struct nameidata nd; 753 int error, part, pmask, s; 754 755 #ifdef DEBUG 756 if (vnddebug & VDB_FOLLOW) 757 printf("vndioctl(%x, %lx, %p, %x, %p): unit %d\n", 758 dev, cmd, addr, flag, p, unit); 759 #endif 760 error = suser(p->p_ucred, &p->p_acflag); 761 if (error) 762 return (error); 763 if (unit >= numvnd) 764 return (ENXIO); 765 766 vnd = &vnd_softc[unit]; 767 vio = (struct vnd_ioctl *)addr; 768 switch (cmd) { 769 770 case VNDIOCSET: 771 if (vnd->sc_flags & VNF_INITED) 772 return (EBUSY); 773 if (!(vnd->sc_flags & VNF_SIMPLE) && vio->vnd_keylen) 774 return (EINVAL); 775 776 if ((error = vndlock(vnd)) != 0) 777 return (error); 778 779 bzero(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname)); 780 if (snprintf(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname), 781 "vnd%d", unit) >= sizeof(vnd->sc_dev.dv_xname)) { 782 printf("VNDIOCSET: device name too long\n"); 783 vndunlock(vnd); 784 return(ENXIO); 785 } 786 787 /* 788 * Always open for read and write. 789 * This is probably bogus, but it lets vn_open() 790 * weed out directories, sockets, etc. so we don't 791 * have to worry about them. 792 */ 793 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 794 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 795 vndunlock(vnd); 796 return (error); 797 } 798 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 799 if (error) { 800 VOP_UNLOCK(nd.ni_vp, 0, p); 801 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 802 vndunlock(vnd); 803 return (error); 804 } 805 VOP_UNLOCK(nd.ni_vp, 0, p); 806 vnd->sc_vp = nd.ni_vp; 807 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 808 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) { 809 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 810 vndunlock(vnd); 811 return (error); 812 } 813 814 if (vio->vnd_keylen) { 815 char *key; 816 817 key = malloc(vio->vnd_keylen, M_TEMP, M_WAITOK); 818 if ((error = copyin((caddr_t)vio->vnd_key, key, 819 vio->vnd_keylen)) != 0) { 820 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 821 p->p_ucred, p); 822 vndunlock(vnd); 823 return (error); 824 } 825 826 vnd->sc_keyctx = malloc(sizeof(blf_ctx), M_DEVBUF, 827 M_WAITOK); 828 blf_key(vnd->sc_keyctx, key, vio->vnd_keylen); 829 bzero(key, vio->vnd_keylen); 830 free((caddr_t)key, M_TEMP); 831 } else 832 vnd->sc_keyctx = NULL; 833 834 vndthrottle(vnd, vnd->sc_vp); 835 vio->vnd_size = dbtob((off_t)vnd->sc_size); 836 vnd->sc_flags |= VNF_INITED; 837 #ifdef DEBUG 838 if (vnddebug & VDB_INIT) 839 printf("vndioctl: SET vp %p size %x\n", 840 vnd->sc_vp, vnd->sc_size); 841 #endif 842 843 /* Attach the disk. */ 844 vnd->sc_dk.dk_driver = &vnddkdriver; 845 vnd->sc_dk.dk_name = vnd->sc_dev.dv_xname; 846 disk_attach(&vnd->sc_dk); 847 dk_establish(&vnd->sc_dk, &vnd->sc_dev); 848 849 vndunlock(vnd); 850 851 break; 852 853 case VNDIOCCLR: 854 if ((vnd->sc_flags & VNF_INITED) == 0) 855 return (ENXIO); 856 857 if ((error = vndlock(vnd)) != 0) 858 return (error); 859 860 /* 861 * Don't unconfigure if any other partitions are open 862 * or if both the character and block flavors of this 863 * partition are open. 864 */ 865 part = DISKPART(dev); 866 pmask = (1 << part); 867 if ((vnd->sc_dk.dk_openmask & ~pmask) || 868 ((vnd->sc_dk.dk_bopenmask & pmask) && 869 (vnd->sc_dk.dk_copenmask & pmask))) { 870 vndunlock(vnd); 871 return (EBUSY); 872 } 873 874 vndclear(vnd); 875 #ifdef DEBUG 876 if (vnddebug & VDB_INIT) 877 printf("vndioctl: CLRed\n"); 878 #endif 879 /* Free crypto key */ 880 if (vnd->sc_keyctx) { 881 bzero(vnd->sc_keyctx, vio->vnd_keylen); 882 free((caddr_t)vnd->sc_keyctx, M_DEVBUF); 883 } 884 885 /* Detatch the disk. */ 886 disk_detach(&vnd->sc_dk); 887 888 /* This must be atomic. */ 889 s = splhigh(); 890 vndunlock(vnd); 891 bzero(vnd, sizeof(struct vnd_softc)); 892 splx(s); 893 break; 894 895 case DIOCGDINFO: 896 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 897 return (ENOTTY); 898 *(struct disklabel *)addr = *(vnd->sc_dk.dk_label); 899 return (0); 900 901 case DIOCGPART: 902 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 903 return (ENOTTY); 904 ((struct partinfo *)addr)->disklab = vnd->sc_dk.dk_label; 905 ((struct partinfo *)addr)->part = 906 &vnd->sc_dk.dk_label->d_partitions[DISKPART(dev)]; 907 return (0); 908 909 case DIOCWDINFO: 910 case DIOCSDINFO: 911 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 912 return (ENOTTY); 913 if ((flag & FWRITE) == 0) 914 return (EBADF); 915 916 if ((error = vndlock(vnd)) != 0) 917 return (error); 918 vnd->sc_flags |= VNF_LABELLING; 919 920 error = setdisklabel(vnd->sc_dk.dk_label, 921 (struct disklabel *)addr, /*vnd->sc_dk.dk_openmask : */0, 922 vnd->sc_dk.dk_cpulabel); 923 if (error == 0) { 924 if (cmd == DIOCWDINFO) 925 error = writedisklabel(MAKEDISKDEV(major(dev), 926 DISKUNIT(dev), RAW_PART), 927 vndstrategy, vnd->sc_dk.dk_label, 928 vnd->sc_dk.dk_cpulabel); 929 } 930 931 vnd->sc_flags &= ~VNF_LABELLING; 932 vndunlock(vnd); 933 return (error); 934 935 case DIOCWLABEL: 936 if ((flag & FWRITE) == 0) 937 return (EBADF); 938 if (*(int *)addr) 939 vnd->sc_flags |= VNF_WLABEL; 940 else 941 vnd->sc_flags &= ~VNF_WLABEL; 942 return (0); 943 944 default: 945 return (ENOTTY); 946 } 947 948 return (0); 949 } 950 951 /* 952 * Duplicate the current processes' credentials. Since we are called only 953 * as the result of a SET ioctl and only root can do that, any future access 954 * to this "disk" is essentially as root. Note that credentials may change 955 * if some other uid can write directly to the mapped file (NFS). 956 */ 957 int 958 vndsetcred(vnd, cred) 959 struct vnd_softc *vnd; 960 struct ucred *cred; 961 { 962 struct uio auio; 963 struct iovec aiov; 964 char *tmpbuf; 965 int error; 966 struct proc *p = curproc; 967 968 vnd->sc_cred = crdup(cred); 969 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 970 971 /* XXX: Horrible kludge to establish credentials for NFS */ 972 aiov.iov_base = tmpbuf; 973 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 974 auio.uio_iov = &aiov; 975 auio.uio_iovcnt = 1; 976 auio.uio_offset = 0; 977 auio.uio_rw = UIO_READ; 978 auio.uio_segflg = UIO_SYSSPACE; 979 auio.uio_resid = aiov.iov_len; 980 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); 981 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 982 VOP_UNLOCK(vnd->sc_vp, 0, p); 983 984 free(tmpbuf, M_TEMP); 985 return (error); 986 } 987 988 /* 989 * Set maxactive based on FS type 990 */ 991 void 992 vndthrottle(vnd, vp) 993 struct vnd_softc *vnd; 994 struct vnode *vp; 995 { 996 #ifdef NFSCLIENT 997 extern int (**nfsv2_vnodeop_p)(void *); 998 999 if (vp->v_op == nfsv2_vnodeop_p) 1000 vnd->sc_maxactive = 2; 1001 else 1002 #endif 1003 vnd->sc_maxactive = 8; 1004 } 1005 1006 void 1007 vndshutdown() 1008 { 1009 struct vnd_softc *vnd; 1010 1011 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1012 if (vnd->sc_flags & VNF_INITED) 1013 vndclear(vnd); 1014 } 1015 1016 void 1017 vndclear(vnd) 1018 struct vnd_softc *vnd; 1019 { 1020 struct vnode *vp = vnd->sc_vp; 1021 struct proc *p = curproc; /* XXX */ 1022 1023 #ifdef DEBUG 1024 if (vnddebug & VDB_FOLLOW) 1025 printf("vndclear(%p): vp %p\n", vnd, vp); 1026 #endif 1027 vnd->sc_flags &= ~VNF_INITED; 1028 if (vp == (struct vnode *)0) 1029 panic("vndioctl: null vp"); 1030 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 1031 crfree(vnd->sc_cred); 1032 vnd->sc_vp = (struct vnode *)0; 1033 vnd->sc_cred = (struct ucred *)0; 1034 vnd->sc_size = 0; 1035 } 1036 1037 int 1038 vndsize(dev) 1039 dev_t dev; 1040 { 1041 int unit = vndunit(dev); 1042 struct vnd_softc *vnd = &vnd_softc[unit]; 1043 1044 if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0) 1045 return (-1); 1046 return (vnd->sc_size); 1047 } 1048 1049 int 1050 vnddump(dev, blkno, va, size) 1051 dev_t dev; 1052 daddr_t blkno; 1053 caddr_t va; 1054 size_t size; 1055 { 1056 1057 /* Not implemented. */ 1058 return (ENXIO); 1059 } 1060 1061 /* 1062 * Wait interruptibly for an exclusive lock. 1063 * 1064 * XXX 1065 * Several drivers do this; it should be abstracted and made MP-safe. 1066 */ 1067 int 1068 vndlock(sc) 1069 struct vnd_softc *sc; 1070 { 1071 int error; 1072 1073 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1074 sc->sc_flags |= VNF_WANTED; 1075 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1076 return (error); 1077 } 1078 sc->sc_flags |= VNF_LOCKED; 1079 return (0); 1080 } 1081 1082 /* 1083 * Unlock and wake up any waiters. 1084 */ 1085 void 1086 vndunlock(sc) 1087 struct vnd_softc *sc; 1088 { 1089 1090 sc->sc_flags &= ~VNF_LOCKED; 1091 if ((sc->sc_flags & VNF_WANTED) != 0) { 1092 sc->sc_flags &= ~VNF_WANTED; 1093 wakeup(sc); 1094 } 1095 } 1096