1 /* $OpenBSD: vnd.c,v 1.95 2009/08/24 08:51:18 jasper Exp $ */ 2 /* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1988 University of Utah. 6 * Copyright (c) 1990, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 38 * 39 * @(#)vn.c 8.6 (Berkeley) 4/1/94 40 */ 41 42 /* 43 * Vnode disk driver. 44 * 45 * Block/character interface to a vnode. Allows one to treat a file 46 * as a disk (e.g. build a filesystem in it, mount it, etc.). 47 * 48 * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the 49 * vnode or simple VOP_READ/VOP_WRITE. The former is suitable for swapping 50 * as it doesn't distort the local buffer cache. The latter is good for 51 * building disk images as it keeps the cache consistent after the block 52 * device is closed. 53 * 54 * NOTE 2: There is a security issue involved with this driver. 55 * Once mounted all access to the contents of the "mapped" file via 56 * the special file is controlled by the permissions on the special 57 * file, the protection of the mapped file is ignored (effectively, 58 * by using root credentials in all transactions). 59 * 60 * NOTE 3: Doesn't interact with leases, should it? 61 */ 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/namei.h> 66 #include <sys/proc.h> 67 #include <sys/errno.h> 68 #include <sys/buf.h> 69 #include <sys/malloc.h> 70 #include <sys/pool.h> 71 #include <sys/ioctl.h> 72 #include <sys/disklabel.h> 73 #include <sys/device.h> 74 #include <sys/disk.h> 75 #include <sys/stat.h> 76 #include <sys/mount.h> 77 #include <sys/vnode.h> 78 #include <sys/file.h> 79 #include <sys/rwlock.h> 80 #include <sys/uio.h> 81 #include <sys/conf.h> 82 83 #include <crypto/blf.h> 84 85 #include <miscfs/specfs/specdev.h> 86 87 #include <dev/vndioctl.h> 88 89 #ifdef VNDDEBUG 90 int dovndcluster = 1; 91 int vnddebug = 0x00; 92 #define VDB_FOLLOW 0x01 93 #define VDB_INIT 0x02 94 #define VDB_IO 0x04 95 #define DNPRINTF(f, p...) do { if ((f) & vnddebug) printf(p); } while (0) 96 #else 97 #define DNPRINTF(f, p...) /* nothing */ 98 #endif /* VNDDEBUG */ 99 100 /* 101 * vndunit is a bit weird. have to reconstitute the dev_t for 102 * DISKUNIT(), but with the minor masked off. 103 */ 104 #define vndunit(x) DISKUNIT(makedev(major(x), minor(x) & 0x7ff)) 105 #define vndsimple(x) (minor(x) & 0x800) 106 107 /* same as MAKEDISKDEV, preserving the vndsimple() property */ 108 #define VNDLABELDEV(dev) \ 109 makedev(major(dev), DISKMINOR(vndunit(dev), RAW_PART) | \ 110 (vndsimple(dev) ? 0x800 : 0)) 111 112 struct vndbuf { 113 struct buf vb_buf; 114 struct buf *vb_obp; 115 }; 116 117 /* 118 * struct vndbuf allocator 119 */ 120 struct pool vndbufpl; 121 122 #define getvndbuf() pool_get(&vndbufpl, PR_WAITOK) 123 #define putvndbuf(vbp) pool_put(&vndbufpl, vbp); 124 125 struct vnd_softc { 126 struct device sc_dev; 127 struct disk sc_dk; 128 129 char sc_file[VNDNLEN]; /* file we're covering */ 130 int sc_flags; /* flags */ 131 size_t sc_size; /* size of vnd in sectors */ 132 size_t sc_secsize; /* sector size in bytes */ 133 size_t sc_nsectors; /* # of sectors per track */ 134 size_t sc_ntracks; /* # of tracks per cylinder */ 135 struct vnode *sc_vp; /* vnode */ 136 struct ucred *sc_cred; /* credentials */ 137 struct buf sc_tab; /* transfer queue */ 138 blf_ctx *sc_keyctx; /* key context */ 139 struct rwlock sc_rwlock; 140 }; 141 142 /* sc_flags */ 143 #define VNF_ALIVE 0x0001 144 #define VNF_INITED 0x0002 145 #define VNF_LABELLING 0x0100 146 #define VNF_WLABEL 0x0200 147 #define VNF_HAVELABEL 0x0400 148 #define VNF_SIMPLE 0x1000 149 #define VNF_READONLY 0x2000 150 151 #define VNDRW(v) ((v)->sc_flags & VNF_READONLY ? FREAD : FREAD|FWRITE) 152 153 struct vnd_softc *vnd_softc; 154 int numvnd = 0; 155 156 struct dkdriver vnddkdriver = { vndstrategy }; 157 158 /* called by main() at boot time */ 159 void vndattach(int); 160 161 void vndclear(struct vnd_softc *); 162 void vndstart(struct vnd_softc *); 163 int vndsetcred(struct vnd_softc *, struct ucred *); 164 void vndiodone(struct buf *); 165 void vndshutdown(void); 166 int vndgetdisklabel(dev_t, struct vnd_softc *, struct disklabel *, int); 167 void vndencrypt(struct vnd_softc *, caddr_t, size_t, daddr64_t, int); 168 size_t vndbdevsize(struct vnode *, struct proc *); 169 170 #define vndlock(sc) rw_enter(&sc->sc_rwlock, RW_WRITE|RW_INTR) 171 #define vndunlock(sc) rw_exit_write(&sc->sc_rwlock) 172 173 void 174 vndencrypt(struct vnd_softc *vnd, caddr_t addr, size_t size, daddr64_t off, 175 int encrypt) 176 { 177 int i, bsize; 178 u_char iv[8]; 179 180 bsize = dbtob(1); 181 for (i = 0; i < size/bsize; i++) { 182 bzero(iv, sizeof(iv)); 183 bcopy((u_char *)&off, iv, sizeof(off)); 184 blf_ecb_encrypt(vnd->sc_keyctx, iv, sizeof(iv)); 185 if (encrypt) 186 blf_cbc_encrypt(vnd->sc_keyctx, iv, addr, bsize); 187 else 188 blf_cbc_decrypt(vnd->sc_keyctx, iv, addr, bsize); 189 190 addr += bsize; 191 off++; 192 } 193 } 194 195 void 196 vndattach(int num) 197 { 198 char *mem; 199 u_long size; 200 int i; 201 202 if (num <= 0) 203 return; 204 size = num * sizeof(struct vnd_softc); 205 mem = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); 206 if (mem == NULL) { 207 printf("WARNING: no memory for vnode disks\n"); 208 return; 209 } 210 vnd_softc = (struct vnd_softc *)mem; 211 for (i = 0; i < num; i++) { 212 rw_init(&vnd_softc[i].sc_rwlock, "vndlock"); 213 } 214 numvnd = num; 215 216 pool_init(&vndbufpl, sizeof(struct vndbuf), 0, 0, 0, "vndbufpl", NULL); 217 pool_setlowat(&vndbufpl, 16); 218 pool_sethiwat(&vndbufpl, 1024); 219 } 220 221 int 222 vndopen(dev_t dev, int flags, int mode, struct proc *p) 223 { 224 int unit = vndunit(dev); 225 struct vnd_softc *sc; 226 int error = 0, part, pmask; 227 228 DNPRINTF(VDB_FOLLOW, "vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p); 229 230 if (unit >= numvnd) 231 return (ENXIO); 232 sc = &vnd_softc[unit]; 233 234 if ((error = vndlock(sc)) != 0) 235 return (error); 236 237 if (!vndsimple(dev) && sc->sc_vp != NULL && 238 (sc->sc_vp->v_type != VREG || sc->sc_keyctx != NULL)) { 239 error = EINVAL; 240 goto bad; 241 } 242 243 if ((flags & FWRITE) && (sc->sc_flags & VNF_READONLY)) { 244 error = EROFS; 245 goto bad; 246 } 247 248 if ((sc->sc_flags & VNF_INITED) && 249 (sc->sc_flags & VNF_HAVELABEL) == 0) { 250 sc->sc_flags |= VNF_HAVELABEL; 251 vndgetdisklabel(dev, sc, sc->sc_dk.dk_label, 0); 252 } 253 254 part = DISKPART(dev); 255 pmask = 1 << part; 256 257 /* 258 * If any partition is open, all succeeding openings must be of the 259 * same type or read-only. 260 */ 261 if (sc->sc_dk.dk_openmask) { 262 if (((sc->sc_flags & VNF_SIMPLE) != 0) != 263 (vndsimple(dev) != 0) && (flags & FWRITE)) { 264 error = EBUSY; 265 goto bad; 266 } 267 } else if (vndsimple(dev)) 268 sc->sc_flags |= VNF_SIMPLE; 269 else 270 sc->sc_flags &= ~VNF_SIMPLE; 271 272 /* Check that the partition exists. */ 273 if (part != RAW_PART && 274 ((sc->sc_flags & VNF_HAVELABEL) == 0 || 275 part >= sc->sc_dk.dk_label->d_npartitions || 276 sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) { 277 error = ENXIO; 278 goto bad; 279 } 280 281 /* Prevent our unit from being unconfigured while open. */ 282 switch (mode) { 283 case S_IFCHR: 284 sc->sc_dk.dk_copenmask |= pmask; 285 break; 286 287 case S_IFBLK: 288 sc->sc_dk.dk_bopenmask |= pmask; 289 break; 290 } 291 sc->sc_dk.dk_openmask = 292 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 293 294 error = 0; 295 bad: 296 vndunlock(sc); 297 return (error); 298 } 299 300 /* 301 * Load the label information on the named device 302 */ 303 int 304 vndgetdisklabel(dev_t dev, struct vnd_softc *sc, struct disklabel *lp, 305 int spoofonly) 306 { 307 bzero(lp, sizeof(struct disklabel)); 308 309 lp->d_secsize = sc->sc_secsize; 310 lp->d_nsectors = sc->sc_nsectors; 311 lp->d_ntracks = sc->sc_ntracks; 312 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 313 lp->d_ncylinders = sc->sc_size / lp->d_secpercyl; 314 315 strncpy(lp->d_typename, "vnd device", sizeof(lp->d_typename)); 316 lp->d_type = DTYPE_VND; 317 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 318 DL_SETDSIZE(lp, sc->sc_size); 319 lp->d_rpm = 3600; 320 lp->d_interleave = 1; 321 lp->d_flags = 0; 322 lp->d_version = 1; 323 324 lp->d_magic = DISKMAGIC; 325 lp->d_magic2 = DISKMAGIC; 326 lp->d_checksum = dkcksum(lp); 327 328 /* Call the generic disklabel extraction routine */ 329 return readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, spoofonly); 330 } 331 332 int 333 vndclose(dev_t dev, int flags, int mode, struct proc *p) 334 { 335 int unit = vndunit(dev); 336 struct vnd_softc *sc; 337 int error = 0, part; 338 339 DNPRINTF(VDB_FOLLOW, "vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p); 340 341 if (unit >= numvnd) 342 return (ENXIO); 343 sc = &vnd_softc[unit]; 344 345 if ((error = vndlock(sc)) != 0) 346 return (error); 347 348 part = DISKPART(dev); 349 350 /* ...that much closer to allowing unconfiguration... */ 351 switch (mode) { 352 case S_IFCHR: 353 sc->sc_dk.dk_copenmask &= ~(1 << part); 354 break; 355 356 case S_IFBLK: 357 sc->sc_dk.dk_bopenmask &= ~(1 << part); 358 break; 359 } 360 sc->sc_dk.dk_openmask = 361 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 362 363 vndunlock(sc); 364 return (0); 365 } 366 367 /* 368 * Two methods are used, the traditional buffercache bypassing and the 369 * newer, cache-coherent on unmount, one. 370 * 371 * Former method: 372 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 373 * Note that this driver can only be used for swapping over NFS on the hp 374 * since nfs_strategy on the vax cannot handle u-areas and page tables. 375 * 376 * Latter method: 377 * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to 378 * access the underlying file. 379 */ 380 void 381 vndstrategy(struct buf *bp) 382 { 383 int unit = vndunit(bp->b_dev); 384 struct vnd_softc *vnd = &vnd_softc[unit]; 385 struct vndbuf *nbp; 386 int bsize; 387 off_t bn; 388 caddr_t addr; 389 size_t resid; 390 int sz, flags, error, s; 391 struct iovec aiov; 392 struct uio auio; 393 struct proc *p = curproc; 394 395 DNPRINTF(VDB_FOLLOW, "vndstrategy(%p): unit %d\n", bp, unit); 396 397 if ((vnd->sc_flags & VNF_INITED) == 0) { 398 bp->b_error = ENXIO; 399 bp->b_flags |= B_ERROR; 400 s = splbio(); 401 biodone(bp); 402 splx(s); 403 return; 404 } 405 406 /* Ensure that the requested block is sector aligned. */ 407 if (bp->b_blkno % DL_BLKSPERSEC(vnd->sc_dk.dk_label) != 0) { 408 bp->b_error = EINVAL; 409 bp->b_flags |= B_ERROR; 410 s = splbio(); 411 biodone(bp); 412 splx(s); 413 return; 414 } 415 416 bn = bp->b_blkno; 417 bp->b_resid = bp->b_bcount; 418 419 if (bn < 0) { 420 bp->b_error = EINVAL; 421 bp->b_flags |= B_ERROR; 422 s = splbio(); 423 biodone(bp); 424 splx(s); 425 return; 426 } 427 428 /* If we have a label, do a boundary check. */ 429 if (vnd->sc_flags & VNF_HAVELABEL) { 430 if (bounds_check_with_label(bp, vnd->sc_dk.dk_label, 1) <= 0) { 431 s = splbio(); 432 biodone(bp); 433 splx(s); 434 return; 435 } 436 437 /* 438 * bounds_check_with_label() changes bp->b_resid, reset it 439 */ 440 bp->b_resid = bp->b_bcount; 441 } 442 443 if (vnd->sc_flags & VNF_HAVELABEL) 444 sz = howmany(bp->b_bcount, vnd->sc_dk.dk_label->d_secsize); 445 else 446 sz = howmany(bp->b_bcount, DEV_BSIZE); 447 448 /* No bypassing of buffer cache? */ 449 if (vndsimple(bp->b_dev)) { 450 /* Loop until all queued requests are handled. */ 451 for (;;) { 452 int part = DISKPART(bp->b_dev); 453 daddr64_t off = DL_SECTOBLK(vnd->sc_dk.dk_label, 454 DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[part])); 455 aiov.iov_base = bp->b_data; 456 auio.uio_resid = aiov.iov_len = bp->b_bcount; 457 auio.uio_iov = &aiov; 458 auio.uio_iovcnt = 1; 459 auio.uio_offset = dbtob((off_t)(bp->b_blkno + off)); 460 auio.uio_segflg = UIO_SYSSPACE; 461 auio.uio_procp = p; 462 463 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p); 464 if (bp->b_flags & B_READ) { 465 auio.uio_rw = UIO_READ; 466 bp->b_error = VOP_READ(vnd->sc_vp, &auio, 0, 467 vnd->sc_cred); 468 if (vnd->sc_keyctx) 469 vndencrypt(vnd, bp->b_data, 470 bp->b_bcount, bp->b_blkno, 0); 471 } else { 472 if (vnd->sc_keyctx) 473 vndencrypt(vnd, bp->b_data, 474 bp->b_bcount, bp->b_blkno, 1); 475 auio.uio_rw = UIO_WRITE; 476 /* 477 * Upper layer has already checked I/O for 478 * limits, so there is no need to do it again. 479 */ 480 bp->b_error = VOP_WRITE(vnd->sc_vp, &auio, 481 IO_NOLIMIT, vnd->sc_cred); 482 /* Data in buffer cache needs to be in clear */ 483 if (vnd->sc_keyctx) 484 vndencrypt(vnd, bp->b_data, 485 bp->b_bcount, bp->b_blkno, 0); 486 } 487 VOP_UNLOCK(vnd->sc_vp, 0, p); 488 if (bp->b_error) 489 bp->b_flags |= B_ERROR; 490 bp->b_resid = auio.uio_resid; 491 s = splbio(); 492 biodone(bp); 493 splx(s); 494 495 /* If nothing more is queued, we are done. */ 496 if (!vnd->sc_tab.b_active) 497 return; 498 499 /* 500 * Dequeue now since lower level strategy 501 * routine might queue using same links. 502 */ 503 s = splbio(); 504 bp = vnd->sc_tab.b_actf; 505 vnd->sc_tab.b_actf = bp->b_actf; 506 vnd->sc_tab.b_active--; 507 splx(s); 508 } 509 } 510 511 if (vnd->sc_vp->v_type != VREG || vnd->sc_keyctx != NULL) { 512 bp->b_error = EINVAL; 513 bp->b_flags |= B_ERROR; 514 s = splbio(); 515 biodone(bp); 516 splx(s); 517 return; 518 } 519 520 /* The old-style buffercache bypassing method. */ 521 bn += DL_SECTOBLK(vnd->sc_dk.dk_label, 522 DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)])); 523 bn = dbtob(bn); 524 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 525 addr = bp->b_data; 526 flags = bp->b_flags | B_CALL; 527 for (resid = bp->b_resid; resid; resid -= sz) { 528 struct vnode *vp; 529 daddr64_t nbn; 530 int off, nra; 531 532 nra = 0; 533 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); 534 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 535 VOP_UNLOCK(vnd->sc_vp, 0, p); 536 if (error == 0 && (long)nbn == -1) 537 error = EIO; 538 #ifdef VNDDEBUG 539 if (!dovndcluster) 540 nra = 0; 541 #endif 542 543 if ((off = bn % bsize) != 0) 544 sz = bsize - off; 545 else 546 sz = (1 + nra) * bsize; 547 if (resid < sz) 548 sz = resid; 549 550 DNPRINTF(VDB_IO, "vndstrategy: vp %p/%p bn %x/%lld sz %x\n", 551 vnd->sc_vp, vp, bn, nbn, sz); 552 553 s = splbio(); 554 nbp = getvndbuf(); 555 splx(s); 556 nbp->vb_buf.b_flags = flags; 557 nbp->vb_buf.b_bcount = sz; 558 nbp->vb_buf.b_bufsize = bp->b_bufsize; 559 nbp->vb_buf.b_error = 0; 560 if (vp->v_type == VBLK || vp->v_type == VCHR) 561 nbp->vb_buf.b_dev = vp->v_rdev; 562 else 563 nbp->vb_buf.b_dev = NODEV; 564 nbp->vb_buf.b_data = addr; 565 nbp->vb_buf.b_blkno = nbn + btodb(off); 566 nbp->vb_buf.b_proc = bp->b_proc; 567 nbp->vb_buf.b_iodone = vndiodone; 568 nbp->vb_buf.b_vp = vp; 569 nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff; 570 nbp->vb_buf.b_dirtyend = bp->b_dirtyend; 571 nbp->vb_buf.b_validoff = bp->b_validoff; 572 nbp->vb_buf.b_validend = bp->b_validend; 573 LIST_INIT(&nbp->vb_buf.b_dep); 574 575 /* save a reference to the old buffer */ 576 nbp->vb_obp = bp; 577 578 /* 579 * If there was an error or a hole in the file...punt. 580 * Note that we deal with this after the nbp allocation. 581 * This ensures that we properly clean up any operations 582 * that we have already fired off. 583 * 584 * XXX we could deal with holes here but it would be 585 * a hassle (in the write case). 586 * We must still however charge for the write even if there 587 * was an error. 588 */ 589 if (error) { 590 nbp->vb_buf.b_error = error; 591 nbp->vb_buf.b_flags |= B_ERROR; 592 bp->b_resid -= (resid - sz); 593 s = splbio(); 594 /* charge for the write */ 595 if ((nbp->vb_buf.b_flags & B_READ) == 0) 596 nbp->vb_buf.b_vp->v_numoutput++; 597 biodone(&nbp->vb_buf); 598 splx(s); 599 return; 600 } 601 /* 602 * Just sort by block number 603 */ 604 nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno; 605 s = splbio(); 606 disksort(&vnd->sc_tab, &nbp->vb_buf); 607 vnd->sc_tab.b_active++; 608 vndstart(vnd); 609 splx(s); 610 bn += sz; 611 addr += sz; 612 } 613 } 614 615 /* 616 * Feed requests sequentially. 617 * We do it this way to keep from flooding NFS servers if we are connected 618 * to an NFS file. This places the burden on the client rather than the 619 * server. 620 */ 621 void 622 vndstart(struct vnd_softc *vnd) 623 { 624 struct buf *bp; 625 626 /* 627 * Dequeue now since lower level strategy routine might 628 * queue using same links 629 */ 630 bp = vnd->sc_tab.b_actf; 631 vnd->sc_tab.b_actf = bp->b_actf; 632 633 DNPRINTF(VDB_IO, 634 "vndstart(%d): bp %p vp %p blkno %lld addr %p cnt %lx\n", 635 vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data, 636 bp->b_bcount); 637 638 /* Instrumentation. */ 639 disk_busy(&vnd->sc_dk); 640 641 if ((bp->b_flags & B_READ) == 0) 642 bp->b_vp->v_numoutput++; 643 VOP_STRATEGY(bp); 644 } 645 646 void 647 vndiodone(struct buf *bp) 648 { 649 struct vndbuf *vbp = (struct vndbuf *) bp; 650 struct buf *pbp = vbp->vb_obp; 651 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 652 653 splassert(IPL_BIO); 654 655 DNPRINTF(VDB_IO, 656 "vndiodone(%d): vbp %p vp %p blkno %lld addr %p cnt %lx\n", 657 vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, 658 vbp->vb_buf.b_data, vbp->vb_buf.b_bcount); 659 660 if (vbp->vb_buf.b_error) { 661 DNPRINTF(VDB_IO, "vndiodone: vbp %p error %d\n", vbp, 662 vbp->vb_buf.b_error); 663 664 pbp->b_flags |= B_ERROR; 665 /* XXX does this matter here? */ 666 (&vbp->vb_buf)->b_flags |= B_RAW; 667 pbp->b_error = biowait(&vbp->vb_buf); 668 } 669 pbp->b_resid -= vbp->vb_buf.b_bcount; 670 putvndbuf(vbp); 671 if (vnd->sc_tab.b_active) { 672 disk_unbusy(&vnd->sc_dk, (pbp->b_bcount - pbp->b_resid), 673 (pbp->b_flags & B_READ)); 674 if (!vnd->sc_tab.b_actf) 675 vnd->sc_tab.b_active--; 676 } 677 if (pbp->b_resid == 0) { 678 DNPRINTF(VDB_IO, "vndiodone: pbp %p iodone\n", pbp); 679 biodone(pbp); 680 } 681 682 } 683 684 /* ARGSUSED */ 685 int 686 vndread(dev_t dev, struct uio *uio, int flags) 687 { 688 int unit = vndunit(dev); 689 struct vnd_softc *sc; 690 691 DNPRINTF(VDB_FOLLOW, "vndread(%x, %p)\n", dev, uio); 692 693 if (unit >= numvnd) 694 return (ENXIO); 695 sc = &vnd_softc[unit]; 696 697 if ((sc->sc_flags & VNF_INITED) == 0) 698 return (ENXIO); 699 700 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 701 } 702 703 /* ARGSUSED */ 704 int 705 vndwrite(dev_t dev, struct uio *uio, int flags) 706 { 707 int unit = vndunit(dev); 708 struct vnd_softc *sc; 709 710 DNPRINTF(VDB_FOLLOW, "vndwrite(%x, %p)\n", dev, uio); 711 712 if (unit >= numvnd) 713 return (ENXIO); 714 sc = &vnd_softc[unit]; 715 716 if ((sc->sc_flags & VNF_INITED) == 0) 717 return (ENXIO); 718 719 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 720 } 721 722 size_t 723 vndbdevsize(struct vnode *vp, struct proc *p) 724 { 725 struct partinfo pi; 726 struct bdevsw *bsw; 727 dev_t dev; 728 729 dev = vp->v_rdev; 730 bsw = bdevsw_lookup(dev); 731 if (bsw->d_ioctl == NULL) 732 return (0); 733 if (bsw->d_ioctl(dev, DIOCGPART, (caddr_t)&pi, FREAD, p)) 734 return (0); 735 DNPRINTF(VDB_INIT, "vndbdevsize: size %li secsize %li\n", 736 (long)pi.part->p_size,(long)pi.disklab->d_secsize); 737 return (pi.part->p_size); 738 } 739 740 /* ARGSUSED */ 741 int 742 vndioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 743 { 744 int unit = vndunit(dev); 745 struct disklabel *lp; 746 struct vnd_softc *vnd; 747 struct vnd_ioctl *vio; 748 struct vnd_user *vnu; 749 struct vattr vattr; 750 struct nameidata nd; 751 int error, part, pmask, s; 752 753 DNPRINTF(VDB_FOLLOW, "vndioctl(%x, %lx, %p, %x, %p): unit %d\n", 754 dev, cmd, addr, flag, p, unit); 755 756 error = suser(p, 0); 757 if (error) 758 return (error); 759 if (unit >= numvnd) 760 return (ENXIO); 761 762 vnd = &vnd_softc[unit]; 763 vio = (struct vnd_ioctl *)addr; 764 switch (cmd) { 765 766 case VNDIOCSET: 767 if (vnd->sc_flags & VNF_INITED) 768 return (EBUSY); 769 if (!(vnd->sc_flags & VNF_SIMPLE) && vio->vnd_keylen) 770 return (EINVAL); 771 772 if ((error = vndlock(vnd)) != 0) 773 return (error); 774 775 if ((error = copyinstr(vio->vnd_file, vnd->sc_file, 776 sizeof(vnd->sc_file), NULL))) { 777 vndunlock(vnd); 778 return (error); 779 } 780 781 bzero(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname)); 782 if (snprintf(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname), 783 "vnd%d", unit) >= sizeof(vnd->sc_dev.dv_xname)) { 784 printf("VNDIOCSET: device name too long\n"); 785 vndunlock(vnd); 786 return(ENXIO); 787 } 788 789 /* Set geometry for device. */ 790 vnd->sc_secsize = vio->vnd_secsize; 791 vnd->sc_ntracks = vio->vnd_ntracks; 792 vnd->sc_nsectors = vio->vnd_nsectors; 793 794 /* 795 * Open for read and write first. This lets vn_open() weed out 796 * directories, sockets, etc. so we don't have to worry about 797 * them. 798 */ 799 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 800 vnd->sc_flags &= ~VNF_READONLY; 801 error = vn_open(&nd, FREAD|FWRITE, 0); 802 if (error == EROFS) { 803 vnd->sc_flags |= VNF_READONLY; 804 error = vn_open(&nd, FREAD, 0); 805 } 806 if (error) { 807 vndunlock(vnd); 808 return (error); 809 } 810 811 if (nd.ni_vp->v_type != VREG && !vndsimple(dev)) { 812 VOP_UNLOCK(nd.ni_vp, 0, p); 813 vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p); 814 vndunlock(vnd); 815 return (EINVAL); 816 } 817 818 if (nd.ni_vp->v_type == VBLK) 819 vnd->sc_size = vndbdevsize(nd.ni_vp, p); 820 else { 821 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 822 if (error) { 823 VOP_UNLOCK(nd.ni_vp, 0, p); 824 vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p); 825 vndunlock(vnd); 826 return (error); 827 } 828 vnd->sc_size = vattr.va_size / vnd->sc_secsize; 829 } 830 VOP_UNLOCK(nd.ni_vp, 0, p); 831 vnd->sc_vp = nd.ni_vp; 832 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) { 833 (void) vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p); 834 vndunlock(vnd); 835 return (error); 836 } 837 838 if (vio->vnd_keylen > 0) { 839 char key[BLF_MAXUTILIZED]; 840 841 if (vio->vnd_keylen > sizeof(key)) 842 vio->vnd_keylen = sizeof(key); 843 844 if ((error = copyin(vio->vnd_key, key, 845 vio->vnd_keylen)) != 0) { 846 (void) vn_close(nd.ni_vp, VNDRW(vnd), 847 p->p_ucred, p); 848 vndunlock(vnd); 849 return (error); 850 } 851 852 vnd->sc_keyctx = malloc(sizeof(*vnd->sc_keyctx), M_DEVBUF, 853 M_WAITOK); 854 blf_key(vnd->sc_keyctx, key, vio->vnd_keylen); 855 bzero(key, vio->vnd_keylen); 856 } else 857 vnd->sc_keyctx = NULL; 858 859 vio->vnd_size = vnd->sc_size * vnd->sc_secsize; 860 vnd->sc_flags |= VNF_INITED; 861 862 DNPRINTF(VDB_INIT, "vndioctl: SET vp %p size %llx\n", 863 vnd->sc_vp, (unsigned long long)vnd->sc_size); 864 865 /* Attach the disk. */ 866 vnd->sc_dk.dk_driver = &vnddkdriver; 867 vnd->sc_dk.dk_name = vnd->sc_dev.dv_xname; 868 disk_attach(&vnd->sc_dk); 869 870 vndunlock(vnd); 871 872 break; 873 874 case VNDIOCCLR: 875 if ((vnd->sc_flags & VNF_INITED) == 0) 876 return (ENXIO); 877 878 if ((error = vndlock(vnd)) != 0) 879 return (error); 880 881 /* 882 * Don't unconfigure if any other partitions are open 883 * or if both the character and block flavors of this 884 * partition are open. 885 */ 886 part = DISKPART(dev); 887 pmask = (1 << part); 888 if ((vnd->sc_dk.dk_openmask & ~pmask) || 889 ((vnd->sc_dk.dk_bopenmask & pmask) && 890 (vnd->sc_dk.dk_copenmask & pmask))) { 891 vndunlock(vnd); 892 return (EBUSY); 893 } 894 895 vndclear(vnd); 896 DNPRINTF(VDB_INIT, "vndioctl: CLRed\n"); 897 898 /* Free crypto key */ 899 if (vnd->sc_keyctx) { 900 bzero(vnd->sc_keyctx, sizeof(*vnd->sc_keyctx)); 901 free(vnd->sc_keyctx, M_DEVBUF); 902 } 903 904 /* Detach the disk. */ 905 disk_detach(&vnd->sc_dk); 906 907 /* This must be atomic. */ 908 s = splhigh(); 909 vndunlock(vnd); 910 bzero(vnd, sizeof(struct vnd_softc)); 911 splx(s); 912 break; 913 914 case VNDIOCGET: 915 vnu = (struct vnd_user *)addr; 916 917 if (vnu->vnu_unit == -1) 918 vnu->vnu_unit = unit; 919 if (vnu->vnu_unit >= numvnd) 920 return (ENXIO); 921 if (vnu->vnu_unit < 0) 922 return (EINVAL); 923 924 vnd = &vnd_softc[vnu->vnu_unit]; 925 926 if (vnd->sc_flags & VNF_INITED) { 927 error = VOP_GETATTR(vnd->sc_vp, &vattr, p->p_ucred, p); 928 if (error) 929 return (error); 930 931 strlcpy(vnu->vnu_file, vnd->sc_file, 932 sizeof(vnu->vnu_file)); 933 vnu->vnu_dev = vattr.va_fsid; 934 vnu->vnu_ino = vattr.va_fileid; 935 } else { 936 vnu->vnu_dev = 0; 937 vnu->vnu_ino = 0; 938 } 939 940 break; 941 942 case DIOCRLDINFO: 943 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 944 return (ENOTTY); 945 lp = malloc(sizeof(*lp), M_TEMP, M_WAITOK); 946 vndgetdisklabel(dev, vnd, lp, 0); 947 *(vnd->sc_dk.dk_label) = *lp; 948 free(lp, M_TEMP); 949 return (0); 950 951 case DIOCGPDINFO: 952 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 953 return (ENOTTY); 954 vndgetdisklabel(dev, vnd, (struct disklabel *)addr, 1); 955 return (0); 956 957 case DIOCGDINFO: 958 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 959 return (ENOTTY); 960 *(struct disklabel *)addr = *(vnd->sc_dk.dk_label); 961 return (0); 962 963 case DIOCGPART: 964 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 965 return (ENOTTY); 966 ((struct partinfo *)addr)->disklab = vnd->sc_dk.dk_label; 967 ((struct partinfo *)addr)->part = 968 &vnd->sc_dk.dk_label->d_partitions[DISKPART(dev)]; 969 return (0); 970 971 case DIOCWDINFO: 972 case DIOCSDINFO: 973 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 974 return (ENOTTY); 975 if ((flag & FWRITE) == 0) 976 return (EBADF); 977 978 if ((error = vndlock(vnd)) != 0) 979 return (error); 980 vnd->sc_flags |= VNF_LABELLING; 981 982 error = setdisklabel(vnd->sc_dk.dk_label, 983 (struct disklabel *)addr, /*vnd->sc_dk.dk_openmask : */0); 984 if (error == 0) { 985 if (cmd == DIOCWDINFO) 986 error = writedisklabel(VNDLABELDEV(dev), 987 vndstrategy, vnd->sc_dk.dk_label); 988 } 989 990 vnd->sc_flags &= ~VNF_LABELLING; 991 vndunlock(vnd); 992 return (error); 993 994 case DIOCWLABEL: 995 if ((flag & FWRITE) == 0) 996 return (EBADF); 997 if (*(int *)addr) 998 vnd->sc_flags |= VNF_WLABEL; 999 else 1000 vnd->sc_flags &= ~VNF_WLABEL; 1001 return (0); 1002 1003 default: 1004 return (ENOTTY); 1005 } 1006 1007 return (0); 1008 } 1009 1010 /* 1011 * Duplicate the current processes' credentials. Since we are called only 1012 * as the result of a SET ioctl and only root can do that, any future access 1013 * to this "disk" is essentially as root. Note that credentials may change 1014 * if some other uid can write directly to the mapped file (NFS). 1015 */ 1016 int 1017 vndsetcred(struct vnd_softc *vnd, struct ucred *cred) 1018 { 1019 struct uio auio; 1020 struct iovec aiov; 1021 char *tmpbuf; 1022 int error; 1023 struct proc *p = curproc; 1024 1025 vnd->sc_cred = crdup(cred); 1026 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1027 1028 /* XXX: Horrible kludge to establish credentials for NFS */ 1029 aiov.iov_base = tmpbuf; 1030 aiov.iov_len = MIN(DEV_BSIZE, vnd->sc_size * vnd->sc_secsize); 1031 auio.uio_iov = &aiov; 1032 auio.uio_iovcnt = 1; 1033 auio.uio_offset = 0; 1034 auio.uio_rw = UIO_READ; 1035 auio.uio_segflg = UIO_SYSSPACE; 1036 auio.uio_resid = aiov.iov_len; 1037 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); 1038 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1039 VOP_UNLOCK(vnd->sc_vp, 0, p); 1040 1041 free(tmpbuf, M_TEMP); 1042 return (error); 1043 } 1044 1045 void 1046 vndshutdown(void) 1047 { 1048 struct vnd_softc *vnd; 1049 1050 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1051 if (vnd->sc_flags & VNF_INITED) 1052 vndclear(vnd); 1053 } 1054 1055 void 1056 vndclear(struct vnd_softc *vnd) 1057 { 1058 struct vnode *vp = vnd->sc_vp; 1059 struct proc *p = curproc; /* XXX */ 1060 1061 DNPRINTF(VDB_FOLLOW, "vndclear(%p): vp %p\n", vnd, vp); 1062 1063 vnd->sc_flags &= ~VNF_INITED; 1064 if (vp == NULL) 1065 panic("vndioctl: null vp"); 1066 (void) vn_close(vp, VNDRW(vnd), vnd->sc_cred, p); 1067 crfree(vnd->sc_cred); 1068 vnd->sc_vp = NULL; 1069 vnd->sc_cred = NULL; 1070 vnd->sc_size = 0; 1071 } 1072 1073 daddr64_t 1074 vndsize(dev_t dev) 1075 { 1076 int unit = vndunit(dev); 1077 struct vnd_softc *vnd = &vnd_softc[unit]; 1078 1079 if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0) 1080 return (-1); 1081 return (vnd->sc_size * (vnd->sc_secsize / DEV_BSIZE)); 1082 } 1083 1084 int 1085 vnddump(dev_t dev, daddr64_t blkno, caddr_t va, size_t size) 1086 { 1087 1088 /* Not implemented. */ 1089 return (ENXIO); 1090 } 1091