1 /* $OpenBSD: vnd.c,v 1.31 2001/12/19 08:58:06 art Exp $ */ 2 /* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1988 University of Utah. 6 * Copyright (c) 1990, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 42 * 43 * @(#)vn.c 8.6 (Berkeley) 4/1/94 44 */ 45 46 /* 47 * Vnode disk driver. 48 * 49 * Block/character interface to a vnode. Allows one to treat a file 50 * as a disk (e.g. build a filesystem in it, mount it, etc.). 51 * 52 * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the 53 * vnode or simple VOP_READ/VOP_WRITE. The former is suitable for swapping 54 * as it doesn't distort the local buffer cache. The latter is good for 55 * building disk images as it keeps the cache consistent after the block 56 * device is closed. 57 * 58 * NOTE 2: There is a security issue involved with this driver. 59 * Once mounted all access to the contents of the "mapped" file via 60 * the special file is controlled by the permissions on the special 61 * file, the protection of the mapped file is ignored (effectively, 62 * by using root credentials in all transactions). 63 * 64 * NOTE 3: Doesn't interact with leases, should it? 65 */ 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/namei.h> 70 #include <sys/proc.h> 71 #include <sys/errno.h> 72 #include <sys/buf.h> 73 #include <sys/malloc.h> 74 #include <sys/ioctl.h> 75 #include <sys/disklabel.h> 76 #include <sys/device.h> 77 #include <sys/disk.h> 78 #include <sys/stat.h> 79 #include <sys/mount.h> 80 #include <sys/vnode.h> 81 #include <sys/file.h> 82 #include <sys/uio.h> 83 #include <sys/conf.h> 84 85 #include <crypto/blf.h> 86 87 #include <miscfs/specfs/specdev.h> 88 89 #include <dev/vndioctl.h> 90 91 #ifdef DEBUG 92 int dovndcluster = 1; 93 int vnddebug = 0x00; 94 #define VDB_FOLLOW 0x01 95 #define VDB_INIT 0x02 96 #define VDB_IO 0x04 97 #endif 98 99 #define b_cylin b_resid 100 101 #define vndunit(x) DISKUNIT((x) & 0x7f) 102 #define vndsimple(x) ((x) & 0x80) 103 #define MAKEVNDDEV(maj, unit, part) MAKEDISKDEV(maj, unit, part) 104 105 #define VNDLABELDEV(dev) (MAKEVNDDEV(major(dev), vndunit(dev), RAW_PART)) 106 107 struct vndbuf { 108 struct buf vb_buf; 109 struct buf *vb_obp; 110 }; 111 112 #define getvndbuf() \ 113 ((struct vndbuf *)malloc(sizeof(struct vndbuf), M_DEVBUF, M_WAITOK)) 114 #define putvndbuf(vbp) \ 115 free((caddr_t)(vbp), M_DEVBUF) 116 117 struct vnd_softc { 118 struct device sc_dev; 119 struct disk sc_dk; 120 121 int sc_flags; /* flags */ 122 size_t sc_size; /* size of vnd in blocks */ 123 struct vnode *sc_vp; /* vnode */ 124 struct ucred *sc_cred; /* credentials */ 125 int sc_maxactive; /* max # of active requests */ 126 struct buf sc_tab; /* transfer queue */ 127 void *sc_keyctx; /* key context */ 128 }; 129 130 /* sc_flags */ 131 #define VNF_ALIVE 0x0001 132 #define VNF_INITED 0x0002 133 #define VNF_WANTED 0x0040 134 #define VNF_LOCKED 0x0080 135 #define VNF_LABELLING 0x0100 136 #define VNF_WLABEL 0x0200 137 #define VNF_HAVELABEL 0x0400 138 #define VNF_BUSY 0x0800 139 #define VNF_SIMPLE 0x1000 140 141 struct vnd_softc *vnd_softc; 142 int numvnd = 0; 143 144 struct dkdriver vnddkdriver = { vndstrategy }; 145 146 /* called by main() at boot time */ 147 void vndattach __P((int)); 148 149 void vndclear __P((struct vnd_softc *)); 150 void vndstart __P((struct vnd_softc *)); 151 int vndsetcred __P((struct vnd_softc *, struct ucred *)); 152 void vndthrottle __P((struct vnd_softc *, struct vnode *)); 153 void vndiodone __P((struct buf *)); 154 void vndshutdown __P((void)); 155 void vndgetdisklabel __P((dev_t, struct vnd_softc *)); 156 void vndencrypt __P((struct vnd_softc *, caddr_t, size_t, daddr_t, int)); 157 158 int vndlock __P((struct vnd_softc *)); 159 void vndunlock __P((struct vnd_softc *)); 160 161 void 162 vndencrypt(vnd, addr, size, off, encrypt) 163 struct vnd_softc *vnd; 164 caddr_t addr; 165 size_t size; 166 daddr_t off; 167 int encrypt; 168 { 169 int i, bsize; 170 u_char iv[8]; 171 172 bsize = dbtob(1); 173 for (i = 0; i < size/bsize; i++) { 174 bzero(iv, sizeof(iv)); 175 bcopy((u_char *)&off, iv, sizeof(off)); 176 blf_ecb_encrypt(vnd->sc_keyctx, iv, sizeof(iv)); 177 if (encrypt) 178 blf_cbc_encrypt(vnd->sc_keyctx, iv, addr, bsize); 179 else 180 blf_cbc_decrypt(vnd->sc_keyctx, iv, addr, bsize); 181 182 addr += bsize; 183 off++; 184 } 185 } 186 187 void 188 vndattach(num) 189 int num; 190 { 191 char *mem; 192 u_long size; 193 194 if (num <= 0) 195 return; 196 size = num * sizeof(struct vnd_softc); 197 mem = malloc(size, M_DEVBUF, M_NOWAIT); 198 if (mem == NULL) { 199 printf("WARNING: no memory for vnode disks\n"); 200 return; 201 } 202 bzero(mem, size); 203 vnd_softc = (struct vnd_softc *)mem; 204 numvnd = num; 205 } 206 207 int 208 vndopen(dev, flags, mode, p) 209 dev_t dev; 210 int flags, mode; 211 struct proc *p; 212 { 213 int unit = vndunit(dev); 214 struct vnd_softc *sc; 215 int error = 0, part, pmask; 216 217 #ifdef DEBUG 218 if (vnddebug & VDB_FOLLOW) 219 printf("vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p); 220 #endif 221 if (unit >= numvnd) 222 return (ENXIO); 223 sc = &vnd_softc[unit]; 224 225 if ((error = vndlock(sc)) != 0) 226 return (error); 227 228 if ((sc->sc_flags & VNF_INITED) && 229 (sc->sc_flags & VNF_HAVELABEL) == 0) { 230 sc->sc_flags |= VNF_HAVELABEL; 231 vndgetdisklabel(dev, sc); 232 } 233 234 part = DISKPART(dev); 235 pmask = 1 << part; 236 237 /* 238 * If any partition is open, all succeeding openings must be of the 239 * same type. 240 */ 241 if (sc->sc_dk.dk_openmask) { 242 if (((sc->sc_flags & VNF_SIMPLE) != 0) != 243 (vndsimple(dev) != 0)) { 244 error = EBUSY; 245 goto bad; 246 } 247 } else if (vndsimple(dev)) 248 sc->sc_flags |= VNF_SIMPLE; 249 else 250 sc->sc_flags &= ~VNF_SIMPLE; 251 252 /* Check that the partition exists. */ 253 if (part != RAW_PART && 254 ((sc->sc_flags & VNF_HAVELABEL) == 0 || 255 part >= sc->sc_dk.dk_label->d_npartitions || 256 sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) { 257 error = ENXIO; 258 goto bad; 259 } 260 261 /* Prevent our unit from being unconfigured while open. */ 262 switch (mode) { 263 case S_IFCHR: 264 sc->sc_dk.dk_copenmask |= pmask; 265 break; 266 267 case S_IFBLK: 268 sc->sc_dk.dk_bopenmask |= pmask; 269 break; 270 } 271 sc->sc_dk.dk_openmask = 272 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 273 274 vndunlock(sc); 275 return (0); 276 bad: 277 vndunlock(sc); 278 return (error); 279 } 280 281 /* 282 * Load the label information on the named device 283 */ 284 void 285 vndgetdisklabel(dev, sc) 286 dev_t dev; 287 struct vnd_softc *sc; 288 { 289 struct disklabel *lp = sc->sc_dk.dk_label; 290 char *errstring; 291 292 bzero(lp, sizeof(struct disklabel)); 293 bzero(sc->sc_dk.dk_cpulabel, sizeof(struct cpu_disklabel)); 294 295 lp->d_secsize = 512; 296 lp->d_ntracks = 1; 297 lp->d_nsectors = 100; 298 lp->d_ncylinders = sc->sc_size / 100; 299 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 300 if (lp->d_secpercyl == 0) { 301 lp->d_secpercyl = 100; 302 /* as long as it's not 0 - readdisklabel divides by it (?) */ 303 } 304 305 strncpy(lp->d_typename, "vnd device", 16); 306 lp->d_type = DTYPE_SCSI; 307 strncpy(lp->d_packname, "fictitious", 16); 308 lp->d_secperunit = sc->sc_size; 309 lp->d_rpm = 3600; 310 lp->d_interleave = 1; 311 lp->d_flags = 0; 312 313 lp->d_partitions[RAW_PART].p_offset = 0; 314 lp->d_partitions[RAW_PART].p_size = 315 lp->d_secperunit * (lp->d_secsize / DEV_BSIZE); 316 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 317 lp->d_npartitions = RAW_PART + 1; 318 319 lp->d_magic = DISKMAGIC; 320 lp->d_magic2 = DISKMAGIC; 321 lp->d_checksum = dkcksum(lp); 322 323 /* 324 * Call the generic disklabel extraction routine 325 */ 326 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, 327 sc->sc_dk.dk_cpulabel, 0); 328 if (errstring) { 329 /*printf("%s: %s\n", sc->sc_dev.dv_xname, errstring);*/ 330 return; 331 } 332 } 333 334 int 335 vndclose(dev, flags, mode, p) 336 dev_t dev; 337 int flags, mode; 338 struct proc *p; 339 { 340 int unit = vndunit(dev); 341 struct vnd_softc *sc; 342 int error = 0, part; 343 344 #ifdef DEBUG 345 if (vnddebug & VDB_FOLLOW) 346 printf("vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p); 347 #endif 348 349 if (unit >= numvnd) 350 return (ENXIO); 351 sc = &vnd_softc[unit]; 352 353 if ((error = vndlock(sc)) != 0) 354 return (error); 355 356 part = DISKPART(dev); 357 358 /* ...that much closer to allowing unconfiguration... */ 359 switch (mode) { 360 case S_IFCHR: 361 sc->sc_dk.dk_copenmask &= ~(1 << part); 362 break; 363 364 case S_IFBLK: 365 sc->sc_dk.dk_bopenmask &= ~(1 << part); 366 break; 367 } 368 sc->sc_dk.dk_openmask = 369 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 370 371 vndunlock(sc); 372 return (0); 373 } 374 375 /* 376 * Two methods are used, the traditional buffercache bypassing and the 377 * newer, cache-coherent on unmount, one. 378 * 379 * Former method: 380 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 381 * Note that this driver can only be used for swapping over NFS on the hp 382 * since nfs_strategy on the vax cannot handle u-areas and page tables. 383 * 384 * Latter method: 385 * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to 386 * access the underlying file. Things are complicated by the fact that we 387 * might get recursively called due to buffer flushes. In those cases we 388 * queue one write. 389 */ 390 void 391 vndstrategy(bp) 392 struct buf *bp; 393 { 394 int unit = vndunit(bp->b_dev); 395 struct vnd_softc *vnd = &vnd_softc[unit]; 396 struct vndbuf *nbp; 397 int bsize; 398 off_t bn; 399 caddr_t addr; 400 size_t resid; 401 int sz, flags, error, s; 402 struct iovec aiov; 403 struct uio auio; 404 struct proc *p = curproc; 405 406 #ifdef DEBUG 407 if (vnddebug & VDB_FOLLOW) 408 printf("vndstrategy(%p): unit %d\n", bp, unit); 409 #endif 410 if ((vnd->sc_flags & VNF_INITED) == 0) { 411 bp->b_error = ENXIO; 412 bp->b_flags |= B_ERROR; 413 biodone(bp); 414 return; 415 } 416 417 bn = bp->b_blkno; 418 sz = howmany(bp->b_bcount, DEV_BSIZE); 419 bp->b_resid = bp->b_bcount; 420 if (bn < 0) { 421 bp->b_error = EINVAL; 422 bp->b_flags |= B_ERROR; 423 biodone(bp); 424 return; 425 } 426 if (DISKPART(bp->b_dev) != RAW_PART && 427 bounds_check_with_label(bp, vnd->sc_dk.dk_label, 428 vnd->sc_dk.dk_cpulabel, 1) == 0) { 429 biodone(bp); 430 return; 431 } 432 433 /* No bypassing of buffer cache? */ 434 if (vndsimple(bp->b_dev)) { 435 /* 436 * In order to avoid "locking against myself" panics, we 437 * must be prepared to queue operations during another I/O 438 * operation. This situation comes up where a dirty cache 439 * buffer needs to be flushed in order to provide the current 440 * operation with a fresh buffer. 441 * 442 * XXX do we really need to protect stuff relating to this with 443 * splbio? 444 */ 445 if (vnd->sc_flags & VNF_BUSY) { 446 s = splbio(); 447 bp->b_actf = vnd->sc_tab.b_actf; 448 vnd->sc_tab.b_actf = bp; 449 vnd->sc_tab.b_active++; 450 splx(s); 451 return; 452 } 453 454 /* Loop until all queued requests are handled. */ 455 for (;;) { 456 int part = DISKPART(bp->b_dev); 457 int off = vnd->sc_dk.dk_label->d_partitions[part].p_offset; 458 459 aiov.iov_base = bp->b_data; 460 auio.uio_resid = aiov.iov_len = bp->b_bcount; 461 auio.uio_iov = &aiov; 462 auio.uio_iovcnt = 1; 463 auio.uio_offset = dbtob((off_t)(bp->b_blkno + off)); 464 auio.uio_segflg = UIO_SYSSPACE; 465 auio.uio_procp = NULL; 466 467 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p); 468 vnd->sc_flags |= VNF_BUSY; 469 if (bp->b_flags & B_READ) { 470 auio.uio_rw = UIO_READ; 471 bp->b_error = VOP_READ(vnd->sc_vp, &auio, 0, 472 vnd->sc_cred); 473 if (vnd->sc_keyctx) 474 vndencrypt(vnd, bp->b_data, 475 bp->b_bcount, 476 bp->b_blkno, 0); 477 } else { 478 if (vnd->sc_keyctx) 479 vndencrypt(vnd, bp->b_data, 480 bp->b_bcount, 481 bp->b_blkno, 1); 482 auio.uio_rw = UIO_WRITE; 483 bp->b_error = VOP_WRITE(vnd->sc_vp, &auio, 0, 484 vnd->sc_cred); 485 /* Data in buffer cache needs to be in clear */ 486 if (vnd->sc_keyctx) 487 vndencrypt(vnd, bp->b_data, 488 bp->b_bcount, 489 bp->b_blkno, 0); 490 } 491 vnd->sc_flags &= ~VNF_BUSY; 492 VOP_UNLOCK(vnd->sc_vp, 0, p); 493 if (bp->b_error) 494 bp->b_flags |= B_ERROR; 495 bp->b_resid = auio.uio_resid; 496 biodone(bp); 497 498 /* If nothing more is queued, we are done. */ 499 if (!vnd->sc_tab.b_active) 500 return; 501 502 /* 503 * Dequeue now since lower level strategy 504 * routine might queue using same links. 505 */ 506 s = splbio(); 507 bp = vnd->sc_tab.b_actf; 508 vnd->sc_tab.b_actf = bp->b_actf; 509 vnd->sc_tab.b_active--; 510 splx(s); 511 } 512 } 513 514 /* The old-style buffercache bypassing method. */ 515 bn += vnd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)].p_offset; 516 bn = dbtob(bn); 517 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 518 addr = bp->b_data; 519 flags = bp->b_flags | B_CALL; 520 for (resid = bp->b_resid; resid; resid -= sz) { 521 struct vnode *vp; 522 daddr_t nbn; 523 int off, s, nra; 524 525 nra = 0; 526 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); 527 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 528 VOP_UNLOCK(vnd->sc_vp, 0, p); 529 if (error == 0 && (long)nbn == -1) 530 error = EIO; 531 #ifdef DEBUG 532 if (!dovndcluster) 533 nra = 0; 534 #endif 535 536 if ((off = bn % bsize) != 0) 537 sz = bsize - off; 538 else 539 sz = (1 + nra) * bsize; 540 if (resid < sz) 541 sz = resid; 542 #ifdef DEBUG 543 if (vnddebug & VDB_IO) 544 printf("vndstrategy: vp %p/%p bn %x/%x sz %x\n", 545 vnd->sc_vp, vp, bn, nbn, sz); 546 #endif 547 548 nbp = getvndbuf(); 549 nbp->vb_buf.b_flags = flags; 550 nbp->vb_buf.b_bcount = sz; 551 nbp->vb_buf.b_bufsize = bp->b_bufsize; 552 nbp->vb_buf.b_error = 0; 553 if (vp->v_type == VBLK || vp->v_type == VCHR) 554 nbp->vb_buf.b_dev = vp->v_rdev; 555 else 556 nbp->vb_buf.b_dev = NODEV; 557 nbp->vb_buf.b_data = addr; 558 nbp->vb_buf.b_blkno = nbn + btodb(off); 559 nbp->vb_buf.b_proc = bp->b_proc; 560 nbp->vb_buf.b_iodone = vndiodone; 561 nbp->vb_buf.b_vp = vp; 562 nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff; 563 nbp->vb_buf.b_dirtyend = bp->b_dirtyend; 564 nbp->vb_buf.b_validoff = bp->b_validoff; 565 nbp->vb_buf.b_validend = bp->b_validend; 566 LIST_INIT(&nbp->vb_buf.b_dep); 567 568 /* save a reference to the old buffer */ 569 nbp->vb_obp = bp; 570 571 /* 572 * If there was an error or a hole in the file...punt. 573 * Note that we deal with this after the nbp allocation. 574 * This ensures that we properly clean up any operations 575 * that we have already fired off. 576 * 577 * XXX we could deal with holes here but it would be 578 * a hassle (in the write case). 579 */ 580 if (error) { 581 nbp->vb_buf.b_error = error; 582 nbp->vb_buf.b_flags |= B_ERROR; 583 bp->b_resid -= (resid - sz); 584 biodone(&nbp->vb_buf); 585 return; 586 } 587 /* 588 * Just sort by block number 589 */ 590 nbp->vb_buf.b_cylin = nbp->vb_buf.b_blkno; 591 s = splbio(); 592 disksort(&vnd->sc_tab, &nbp->vb_buf); 593 if (vnd->sc_tab.b_active < vnd->sc_maxactive) { 594 vnd->sc_tab.b_active++; 595 vndstart(vnd); 596 } 597 splx(s); 598 599 bn += sz; 600 addr += sz; 601 } 602 } 603 604 /* 605 * Feed requests sequentially. 606 * We do it this way to keep from flooding NFS servers if we are connected 607 * to an NFS file. This places the burden on the client rather than the 608 * server. 609 */ 610 void 611 vndstart(vnd) 612 struct vnd_softc *vnd; 613 { 614 struct buf *bp; 615 616 /* 617 * Dequeue now since lower level strategy routine might 618 * queue using same links 619 */ 620 bp = vnd->sc_tab.b_actf; 621 vnd->sc_tab.b_actf = bp->b_actf; 622 #ifdef DEBUG 623 if (vnddebug & VDB_IO) 624 printf("vndstart(%d): bp %p vp %p blkno %x addr %p cnt %lx\n", 625 vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data, 626 bp->b_bcount); 627 #endif 628 629 /* Instrumentation. */ 630 disk_busy(&vnd->sc_dk); 631 632 if ((bp->b_flags & B_READ) == 0) 633 bp->b_vp->v_numoutput++; 634 VOP_STRATEGY(bp); 635 } 636 637 void 638 vndiodone(bp) 639 struct buf *bp; 640 { 641 struct vndbuf *vbp = (struct vndbuf *) bp; 642 struct buf *pbp = vbp->vb_obp; 643 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 644 long count; 645 int s; 646 647 s = splbio(); 648 #ifdef DEBUG 649 if (vnddebug & VDB_IO) 650 printf("vndiodone(%d): vbp %p vp %p blkno %x addr %p cnt %lx\n", 651 vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, 652 vbp->vb_buf.b_data, vbp->vb_buf.b_bcount); 653 #endif 654 655 if (vbp->vb_buf.b_error) { 656 #ifdef DEBUG 657 if (vnddebug & VDB_IO) 658 printf("vndiodone: vbp %p error %d\n", vbp, 659 vbp->vb_buf.b_error); 660 #endif 661 pbp->b_flags |= B_ERROR; 662 pbp->b_error = biowait(&vbp->vb_buf); 663 } 664 pbp->b_resid -= vbp->vb_buf.b_bcount; 665 putvndbuf(vbp); 666 count = pbp->b_bcount - pbp->b_resid; 667 if (pbp->b_resid == 0) { 668 #ifdef DEBUG 669 if (vnddebug & VDB_IO) 670 printf("vndiodone: pbp %p iodone\n", pbp); 671 #endif 672 biodone(pbp); 673 } 674 if (vnd->sc_tab.b_active) { 675 disk_unbusy(&vnd->sc_dk, count); 676 if (vnd->sc_tab.b_actf) 677 vndstart(vnd); 678 else 679 vnd->sc_tab.b_active--; 680 } 681 splx(s); 682 } 683 684 /* ARGSUSED */ 685 int 686 vndread(dev, uio, flags) 687 dev_t dev; 688 struct uio *uio; 689 int flags; 690 { 691 int unit = vndunit(dev); 692 struct vnd_softc *sc; 693 694 #ifdef DEBUG 695 if (vnddebug & VDB_FOLLOW) 696 printf("vndread(%x, %p)\n", dev, uio); 697 #endif 698 699 if (unit >= numvnd) 700 return (ENXIO); 701 sc = &vnd_softc[unit]; 702 703 if ((sc->sc_flags & VNF_INITED) == 0) 704 return (ENXIO); 705 706 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 707 } 708 709 /* ARGSUSED */ 710 int 711 vndwrite(dev, uio, flags) 712 dev_t dev; 713 struct uio *uio; 714 int flags; 715 { 716 int unit = vndunit(dev); 717 struct vnd_softc *sc; 718 719 #ifdef DEBUG 720 if (vnddebug & VDB_FOLLOW) 721 printf("vndwrite(%x, %p)\n", dev, uio); 722 #endif 723 724 if (unit >= numvnd) 725 return (ENXIO); 726 sc = &vnd_softc[unit]; 727 728 if ((sc->sc_flags & VNF_INITED) == 0) 729 return (ENXIO); 730 731 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 732 } 733 734 /* ARGSUSED */ 735 int 736 vndioctl(dev, cmd, addr, flag, p) 737 dev_t dev; 738 u_long cmd; 739 caddr_t addr; 740 int flag; 741 struct proc *p; 742 { 743 int unit = vndunit(dev); 744 struct vnd_softc *vnd; 745 struct vnd_ioctl *vio; 746 struct vattr vattr; 747 struct nameidata nd; 748 int error, part, pmask, s; 749 750 #ifdef DEBUG 751 if (vnddebug & VDB_FOLLOW) 752 printf("vndioctl(%x, %lx, %p, %x, %p): unit %d\n", 753 dev, cmd, addr, flag, p, unit); 754 #endif 755 error = suser(p->p_ucred, &p->p_acflag); 756 if (error) 757 return (error); 758 if (unit >= numvnd) 759 return (ENXIO); 760 761 vnd = &vnd_softc[unit]; 762 vio = (struct vnd_ioctl *)addr; 763 switch (cmd) { 764 765 case VNDIOCSET: 766 if (vnd->sc_flags & VNF_INITED) 767 return (EBUSY); 768 if (!(vnd->sc_flags & VNF_SIMPLE) && vio->vnd_keylen) 769 return (EINVAL); 770 771 if ((error = vndlock(vnd)) != 0) 772 return (error); 773 774 /* 775 * Always open for read and write. 776 * This is probably bogus, but it lets vn_open() 777 * weed out directories, sockets, etc. so we don't 778 * have to worry about them. 779 */ 780 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 781 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 782 vndunlock(vnd); 783 return (error); 784 } 785 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 786 if (error) { 787 VOP_UNLOCK(nd.ni_vp, 0, p); 788 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 789 vndunlock(vnd); 790 return (error); 791 } 792 VOP_UNLOCK(nd.ni_vp, 0, p); 793 vnd->sc_vp = nd.ni_vp; 794 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 795 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) { 796 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 797 vndunlock(vnd); 798 return (error); 799 } 800 801 if (vio->vnd_keylen) { 802 char *key; 803 804 key = malloc(vio->vnd_keylen, M_TEMP, M_WAITOK); 805 if ((error = copyin((caddr_t)vio->vnd_key, key, 806 vio->vnd_keylen)) != 0) { 807 (void) vn_close(nd.ni_vp, FREAD|FWRITE, 808 p->p_ucred, p); 809 vndunlock(vnd); 810 return (error); 811 } 812 813 vnd->sc_keyctx = malloc(sizeof(blf_ctx), M_DEVBUF, 814 M_WAITOK); 815 blf_key(vnd->sc_keyctx, key, vio->vnd_keylen); 816 bzero(key, vio->vnd_keylen); 817 free((caddr_t)key, M_TEMP); 818 } else 819 vnd->sc_keyctx = NULL; 820 821 vndthrottle(vnd, vnd->sc_vp); 822 vio->vnd_size = dbtob((off_t)vnd->sc_size); 823 vnd->sc_flags |= VNF_INITED; 824 #ifdef DEBUG 825 if (vnddebug & VDB_INIT) 826 printf("vndioctl: SET vp %p size %x\n", 827 vnd->sc_vp, vnd->sc_size); 828 #endif 829 830 /* Attach the disk. */ 831 bzero(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname)); 832 sprintf(vnd->sc_dev.dv_xname, "vnd%d", unit); 833 vnd->sc_dk.dk_driver = &vnddkdriver; 834 vnd->sc_dk.dk_name = vnd->sc_dev.dv_xname; 835 disk_attach(&vnd->sc_dk); 836 dk_establish(&vnd->sc_dk, &vnd->sc_dev); 837 838 vndunlock(vnd); 839 840 break; 841 842 case VNDIOCCLR: 843 if ((vnd->sc_flags & VNF_INITED) == 0) 844 return (ENXIO); 845 846 if ((error = vndlock(vnd)) != 0) 847 return (error); 848 849 /* 850 * Don't unconfigure if any other partitions are open 851 * or if both the character and block flavors of this 852 * partition are open. 853 */ 854 part = DISKPART(dev); 855 pmask = (1 << part); 856 if ((vnd->sc_dk.dk_openmask & ~pmask) || 857 ((vnd->sc_dk.dk_bopenmask & pmask) && 858 (vnd->sc_dk.dk_copenmask & pmask))) { 859 vndunlock(vnd); 860 return (EBUSY); 861 } 862 863 vndclear(vnd); 864 #ifdef DEBUG 865 if (vnddebug & VDB_INIT) 866 printf("vndioctl: CLRed\n"); 867 #endif 868 /* Free crypto key */ 869 if (vnd->sc_keyctx) { 870 bzero(vnd->sc_keyctx, vio->vnd_keylen); 871 free((caddr_t)vnd->sc_keyctx, M_DEVBUF); 872 } 873 874 /* Detatch the disk. */ 875 disk_detach(&vnd->sc_dk); 876 877 /* This must be atomic. */ 878 s = splhigh(); 879 vndunlock(vnd); 880 bzero(vnd, sizeof(struct vnd_softc)); 881 splx(s); 882 break; 883 884 case DIOCGDINFO: 885 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 886 return (ENOTTY); 887 *(struct disklabel *)addr = *(vnd->sc_dk.dk_label); 888 return (0); 889 890 case DIOCGPART: 891 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 892 return (ENOTTY); 893 ((struct partinfo *)addr)->disklab = vnd->sc_dk.dk_label; 894 ((struct partinfo *)addr)->part = 895 &vnd->sc_dk.dk_label->d_partitions[DISKPART(dev)]; 896 return (0); 897 898 case DIOCWDINFO: 899 case DIOCSDINFO: 900 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 901 return (ENOTTY); 902 if ((flag & FWRITE) == 0) 903 return (EBADF); 904 905 if ((error = vndlock(vnd)) != 0) 906 return (error); 907 vnd->sc_flags |= VNF_LABELLING; 908 909 error = setdisklabel(vnd->sc_dk.dk_label, 910 (struct disklabel *)addr, /*vnd->sc_dk.dk_openmask : */0, 911 vnd->sc_dk.dk_cpulabel); 912 if (error == 0) { 913 if (cmd == DIOCWDINFO) 914 error = writedisklabel(MAKEDISKDEV(major(dev), 915 DISKUNIT(dev), RAW_PART), 916 vndstrategy, vnd->sc_dk.dk_label, 917 vnd->sc_dk.dk_cpulabel); 918 } 919 920 vnd->sc_flags &= ~VNF_LABELLING; 921 vndunlock(vnd); 922 return (error); 923 924 case DIOCWLABEL: 925 if ((flag & FWRITE) == 0) 926 return (EBADF); 927 if (*(int *)addr) 928 vnd->sc_flags |= VNF_WLABEL; 929 else 930 vnd->sc_flags &= ~VNF_WLABEL; 931 return (0); 932 933 default: 934 return (ENOTTY); 935 } 936 937 return (0); 938 } 939 940 /* 941 * Duplicate the current processes' credentials. Since we are called only 942 * as the result of a SET ioctl and only root can do that, any future access 943 * to this "disk" is essentially as root. Note that credentials may change 944 * if some other uid can write directly to the mapped file (NFS). 945 */ 946 int 947 vndsetcred(vnd, cred) 948 struct vnd_softc *vnd; 949 struct ucred *cred; 950 { 951 struct uio auio; 952 struct iovec aiov; 953 char *tmpbuf; 954 int error; 955 struct proc *p = curproc; 956 957 vnd->sc_cred = crdup(cred); 958 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 959 960 /* XXX: Horrible kludge to establish credentials for NFS */ 961 aiov.iov_base = tmpbuf; 962 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 963 auio.uio_iov = &aiov; 964 auio.uio_iovcnt = 1; 965 auio.uio_offset = 0; 966 auio.uio_rw = UIO_READ; 967 auio.uio_segflg = UIO_SYSSPACE; 968 auio.uio_resid = aiov.iov_len; 969 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); 970 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 971 VOP_UNLOCK(vnd->sc_vp, 0, p); 972 973 free(tmpbuf, M_TEMP); 974 return (error); 975 } 976 977 /* 978 * Set maxactive based on FS type 979 */ 980 void 981 vndthrottle(vnd, vp) 982 struct vnd_softc *vnd; 983 struct vnode *vp; 984 { 985 #ifdef NFSCLIENT 986 extern int (**nfsv2_vnodeop_p) __P((void *)); 987 988 if (vp->v_op == nfsv2_vnodeop_p) 989 vnd->sc_maxactive = 2; 990 else 991 #endif 992 vnd->sc_maxactive = 8; 993 994 if (vnd->sc_maxactive < 1) 995 vnd->sc_maxactive = 1; 996 } 997 998 void 999 vndshutdown() 1000 { 1001 struct vnd_softc *vnd; 1002 1003 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1004 if (vnd->sc_flags & VNF_INITED) 1005 vndclear(vnd); 1006 } 1007 1008 void 1009 vndclear(vnd) 1010 struct vnd_softc *vnd; 1011 { 1012 struct vnode *vp = vnd->sc_vp; 1013 struct proc *p = curproc; /* XXX */ 1014 1015 #ifdef DEBUG 1016 if (vnddebug & VDB_FOLLOW) 1017 printf("vndclear(%p): vp %p\n", vnd, vp); 1018 #endif 1019 vnd->sc_flags &= ~VNF_INITED; 1020 if (vp == (struct vnode *)0) 1021 panic("vndioctl: null vp"); 1022 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 1023 crfree(vnd->sc_cred); 1024 vnd->sc_vp = (struct vnode *)0; 1025 vnd->sc_cred = (struct ucred *)0; 1026 vnd->sc_size = 0; 1027 } 1028 1029 int 1030 vndsize(dev) 1031 dev_t dev; 1032 { 1033 int unit = vndunit(dev); 1034 struct vnd_softc *vnd = &vnd_softc[unit]; 1035 1036 if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0) 1037 return (-1); 1038 return (vnd->sc_size); 1039 } 1040 1041 int 1042 vnddump(dev, blkno, va, size) 1043 dev_t dev; 1044 daddr_t blkno; 1045 caddr_t va; 1046 size_t size; 1047 { 1048 1049 /* Not implemented. */ 1050 return (ENXIO); 1051 } 1052 1053 /* 1054 * Wait interruptibly for an exclusive lock. 1055 * 1056 * XXX 1057 * Several drivers do this; it should be abstracted and made MP-safe. 1058 */ 1059 int 1060 vndlock(sc) 1061 struct vnd_softc *sc; 1062 { 1063 int error; 1064 1065 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1066 sc->sc_flags |= VNF_WANTED; 1067 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1068 return (error); 1069 } 1070 sc->sc_flags |= VNF_LOCKED; 1071 return (0); 1072 } 1073 1074 /* 1075 * Unlock and wake up any waiters. 1076 */ 1077 void 1078 vndunlock(sc) 1079 struct vnd_softc *sc; 1080 { 1081 1082 sc->sc_flags &= ~VNF_LOCKED; 1083 if ((sc->sc_flags & VNF_WANTED) != 0) { 1084 sc->sc_flags &= ~VNF_WANTED; 1085 wakeup(sc); 1086 } 1087 } 1088