1 /* $OpenBSD: vnd.c,v 1.90 2008/09/03 23:24:25 krw Exp $ */ 2 /* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1988 University of Utah. 6 * Copyright (c) 1990, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 38 * 39 * @(#)vn.c 8.6 (Berkeley) 4/1/94 40 */ 41 42 /* 43 * Vnode disk driver. 44 * 45 * Block/character interface to a vnode. Allows one to treat a file 46 * as a disk (e.g. build a filesystem in it, mount it, etc.). 47 * 48 * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the 49 * vnode or simple VOP_READ/VOP_WRITE. The former is suitable for swapping 50 * as it doesn't distort the local buffer cache. The latter is good for 51 * building disk images as it keeps the cache consistent after the block 52 * device is closed. 53 * 54 * NOTE 2: There is a security issue involved with this driver. 55 * Once mounted all access to the contents of the "mapped" file via 56 * the special file is controlled by the permissions on the special 57 * file, the protection of the mapped file is ignored (effectively, 58 * by using root credentials in all transactions). 59 * 60 * NOTE 3: Doesn't interact with leases, should it? 61 */ 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/namei.h> 66 #include <sys/proc.h> 67 #include <sys/errno.h> 68 #include <sys/buf.h> 69 #include <sys/malloc.h> 70 #include <sys/pool.h> 71 #include <sys/ioctl.h> 72 #include <sys/disklabel.h> 73 #include <sys/device.h> 74 #include <sys/disk.h> 75 #include <sys/stat.h> 76 #include <sys/mount.h> 77 #include <sys/vnode.h> 78 #include <sys/file.h> 79 #include <sys/rwlock.h> 80 #include <sys/uio.h> 81 #include <sys/conf.h> 82 83 #include <crypto/blf.h> 84 85 #include <miscfs/specfs/specdev.h> 86 87 #include <dev/vndioctl.h> 88 89 #ifdef VNDDEBUG 90 int dovndcluster = 1; 91 int vnddebug = 0x00; 92 #define VDB_FOLLOW 0x01 93 #define VDB_INIT 0x02 94 #define VDB_IO 0x04 95 #define DNPRINTF(f, p...) do { if ((f) & vnddebug) printf(p); } while (0) 96 #else 97 #define DNPRINTF(f, p...) /* nothing */ 98 #endif /* VNDDEBUG */ 99 100 /* 101 * vndunit is a bit weird. have to reconstitute the dev_t for 102 * DISKUNIT(), but with the minor masked off. 103 */ 104 #define vndunit(x) DISKUNIT(makedev(major(x), minor(x) & 0x7ff)) 105 #define vndsimple(x) (minor(x) & 0x800) 106 107 /* same as MAKEDISKDEV, preserving the vndsimple() property */ 108 #define VNDLABELDEV(dev) \ 109 makedev(major(dev), DISKMINOR(vndunit(dev), RAW_PART) | \ 110 (vndsimple(dev) ? 0x800 : 0)) 111 112 struct vndbuf { 113 struct buf vb_buf; 114 struct buf *vb_obp; 115 }; 116 117 /* 118 * struct vndbuf allocator 119 */ 120 struct pool vndbufpl; 121 122 #define getvndbuf() pool_get(&vndbufpl, PR_WAITOK) 123 #define putvndbuf(vbp) pool_put(&vndbufpl, vbp); 124 125 struct vnd_softc { 126 struct device sc_dev; 127 struct disk sc_dk; 128 129 char sc_file[VNDNLEN]; /* file we're covering */ 130 int sc_flags; /* flags */ 131 size_t sc_size; /* size of vnd in sectors */ 132 size_t sc_secsize; /* sector size in bytes */ 133 size_t sc_nsectors; /* # of sectors per track */ 134 size_t sc_ntracks; /* # of tracks per cylinder */ 135 struct vnode *sc_vp; /* vnode */ 136 struct ucred *sc_cred; /* credentials */ 137 struct buf sc_tab; /* transfer queue */ 138 blf_ctx *sc_keyctx; /* key context */ 139 struct rwlock sc_rwlock; 140 }; 141 142 /* sc_flags */ 143 #define VNF_ALIVE 0x0001 144 #define VNF_INITED 0x0002 145 #define VNF_LABELLING 0x0100 146 #define VNF_WLABEL 0x0200 147 #define VNF_HAVELABEL 0x0400 148 #define VNF_SIMPLE 0x1000 149 #define VNF_READONLY 0x2000 150 151 #define VNDRW(v) ((v)->sc_flags & VNF_READONLY ? FREAD : FREAD|FWRITE) 152 153 struct vnd_softc *vnd_softc; 154 int numvnd = 0; 155 156 struct dkdriver vnddkdriver = { vndstrategy }; 157 158 /* called by main() at boot time */ 159 void vndattach(int); 160 161 void vndclear(struct vnd_softc *); 162 void vndstart(struct vnd_softc *); 163 int vndsetcred(struct vnd_softc *, struct ucred *); 164 void vndiodone(struct buf *); 165 void vndshutdown(void); 166 void vndgetdisklabel(dev_t, struct vnd_softc *, struct disklabel *, int); 167 void vndencrypt(struct vnd_softc *, caddr_t, size_t, daddr64_t, int); 168 size_t vndbdevsize(struct vnode *, struct proc *); 169 170 #define vndlock(sc) rw_enter(&sc->sc_rwlock, RW_WRITE|RW_INTR) 171 #define vndunlock(sc) rw_exit_write(&sc->sc_rwlock) 172 173 void 174 vndencrypt(struct vnd_softc *vnd, caddr_t addr, size_t size, daddr64_t off, 175 int encrypt) 176 { 177 int i, bsize; 178 u_char iv[8]; 179 180 bsize = dbtob(1); 181 for (i = 0; i < size/bsize; i++) { 182 bzero(iv, sizeof(iv)); 183 bcopy((u_char *)&off, iv, sizeof(off)); 184 blf_ecb_encrypt(vnd->sc_keyctx, iv, sizeof(iv)); 185 if (encrypt) 186 blf_cbc_encrypt(vnd->sc_keyctx, iv, addr, bsize); 187 else 188 blf_cbc_decrypt(vnd->sc_keyctx, iv, addr, bsize); 189 190 addr += bsize; 191 off++; 192 } 193 } 194 195 void 196 vndattach(int num) 197 { 198 char *mem; 199 u_long size; 200 int i; 201 202 if (num <= 0) 203 return; 204 size = num * sizeof(struct vnd_softc); 205 mem = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); 206 if (mem == NULL) { 207 printf("WARNING: no memory for vnode disks\n"); 208 return; 209 } 210 vnd_softc = (struct vnd_softc *)mem; 211 for (i = 0; i < num; i++) { 212 rw_init(&vnd_softc[i].sc_rwlock, "vndlock"); 213 } 214 numvnd = num; 215 216 pool_init(&vndbufpl, sizeof(struct vndbuf), 0, 0, 0, "vndbufpl", NULL); 217 pool_setlowat(&vndbufpl, 16); 218 pool_sethiwat(&vndbufpl, 1024); 219 } 220 221 int 222 vndopen(dev_t dev, int flags, int mode, struct proc *p) 223 { 224 int unit = vndunit(dev); 225 struct vnd_softc *sc; 226 int error = 0, part, pmask; 227 228 DNPRINTF(VDB_FOLLOW, "vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p); 229 230 if (unit >= numvnd) 231 return (ENXIO); 232 sc = &vnd_softc[unit]; 233 234 if ((error = vndlock(sc)) != 0) 235 return (error); 236 237 if (!vndsimple(dev) && sc->sc_vp != NULL && 238 (sc->sc_vp->v_type != VREG || sc->sc_keyctx != NULL)) { 239 error = EINVAL; 240 goto bad; 241 } 242 243 if ((flags & FWRITE) && (sc->sc_flags & VNF_READONLY)) { 244 error = EROFS; 245 goto bad; 246 } 247 248 if ((sc->sc_flags & VNF_INITED) && 249 (sc->sc_flags & VNF_HAVELABEL) == 0) { 250 sc->sc_flags |= VNF_HAVELABEL; 251 vndgetdisklabel(dev, sc, sc->sc_dk.dk_label, 0); 252 } 253 254 part = DISKPART(dev); 255 pmask = 1 << part; 256 257 /* 258 * If any partition is open, all succeeding openings must be of the 259 * same type or read-only. 260 */ 261 if (sc->sc_dk.dk_openmask) { 262 if (((sc->sc_flags & VNF_SIMPLE) != 0) != 263 (vndsimple(dev) != 0) && (flags & FWRITE)) { 264 error = EBUSY; 265 goto bad; 266 } 267 } else if (vndsimple(dev)) 268 sc->sc_flags |= VNF_SIMPLE; 269 else 270 sc->sc_flags &= ~VNF_SIMPLE; 271 272 /* Check that the partition exists. */ 273 if (part != RAW_PART && 274 ((sc->sc_flags & VNF_HAVELABEL) == 0 || 275 part >= sc->sc_dk.dk_label->d_npartitions || 276 sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) { 277 error = ENXIO; 278 goto bad; 279 } 280 281 /* Prevent our unit from being unconfigured while open. */ 282 switch (mode) { 283 case S_IFCHR: 284 sc->sc_dk.dk_copenmask |= pmask; 285 break; 286 287 case S_IFBLK: 288 sc->sc_dk.dk_bopenmask |= pmask; 289 break; 290 } 291 sc->sc_dk.dk_openmask = 292 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 293 294 error = 0; 295 bad: 296 vndunlock(sc); 297 return (error); 298 } 299 300 /* 301 * Load the label information on the named device 302 */ 303 void 304 vndgetdisklabel(dev_t dev, struct vnd_softc *sc, struct disklabel *lp, 305 int spoofonly) 306 { 307 char *errstring = NULL; 308 309 bzero(lp, sizeof(struct disklabel)); 310 311 lp->d_secsize = sc->sc_secsize; 312 lp->d_nsectors = sc->sc_nsectors; 313 lp->d_ntracks = sc->sc_ntracks; 314 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 315 lp->d_ncylinders = sc->sc_size / lp->d_secpercyl; 316 317 strncpy(lp->d_typename, "vnd device", sizeof(lp->d_typename)); 318 lp->d_type = DTYPE_VND; 319 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 320 DL_SETDSIZE(lp, sc->sc_size); 321 lp->d_rpm = 3600; 322 lp->d_interleave = 1; 323 lp->d_flags = 0; 324 lp->d_version = 1; 325 326 lp->d_magic = DISKMAGIC; 327 lp->d_magic2 = DISKMAGIC; 328 lp->d_checksum = dkcksum(lp); 329 330 /* Call the generic disklabel extraction routine */ 331 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, spoofonly); 332 if (errstring) { 333 DNPRINTF(VDB_IO, "%s: %s\n", sc->sc_dev.dv_xname, 334 errstring); 335 return; 336 } 337 } 338 339 int 340 vndclose(dev_t dev, int flags, int mode, struct proc *p) 341 { 342 int unit = vndunit(dev); 343 struct vnd_softc *sc; 344 int error = 0, part; 345 346 DNPRINTF(VDB_FOLLOW, "vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p); 347 348 if (unit >= numvnd) 349 return (ENXIO); 350 sc = &vnd_softc[unit]; 351 352 if ((error = vndlock(sc)) != 0) 353 return (error); 354 355 part = DISKPART(dev); 356 357 /* ...that much closer to allowing unconfiguration... */ 358 switch (mode) { 359 case S_IFCHR: 360 sc->sc_dk.dk_copenmask &= ~(1 << part); 361 break; 362 363 case S_IFBLK: 364 sc->sc_dk.dk_bopenmask &= ~(1 << part); 365 break; 366 } 367 sc->sc_dk.dk_openmask = 368 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 369 370 vndunlock(sc); 371 return (0); 372 } 373 374 /* 375 * Two methods are used, the traditional buffercache bypassing and the 376 * newer, cache-coherent on unmount, one. 377 * 378 * Former method: 379 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 380 * Note that this driver can only be used for swapping over NFS on the hp 381 * since nfs_strategy on the vax cannot handle u-areas and page tables. 382 * 383 * Latter method: 384 * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to 385 * access the underlying file. 386 */ 387 void 388 vndstrategy(struct buf *bp) 389 { 390 int unit = vndunit(bp->b_dev); 391 struct vnd_softc *vnd = &vnd_softc[unit]; 392 struct vndbuf *nbp; 393 int bsize; 394 off_t bn; 395 caddr_t addr; 396 size_t resid; 397 int sz, flags, error, s; 398 struct iovec aiov; 399 struct uio auio; 400 struct proc *p = curproc; 401 402 DNPRINTF(VDB_FOLLOW, "vndstrategy(%p): unit %d\n", bp, unit); 403 404 if ((vnd->sc_flags & VNF_INITED) == 0) { 405 bp->b_error = ENXIO; 406 bp->b_flags |= B_ERROR; 407 s = splbio(); 408 biodone(bp); 409 splx(s); 410 return; 411 } 412 413 /* Ensure that the requested block is sector aligned. */ 414 if (bp->b_blkno % DL_BLKSPERSEC(vnd->sc_dk.dk_label) != 0) { 415 bp->b_error = EINVAL; 416 bp->b_flags |= B_ERROR; 417 s = splbio(); 418 biodone(bp); 419 splx(s); 420 return; 421 } 422 423 bn = bp->b_blkno; 424 bp->b_resid = bp->b_bcount; 425 426 if (bn < 0) { 427 bp->b_error = EINVAL; 428 bp->b_flags |= B_ERROR; 429 s = splbio(); 430 biodone(bp); 431 splx(s); 432 return; 433 } 434 435 /* If we have a label, do a boundary check. */ 436 if (vnd->sc_flags & VNF_HAVELABEL) { 437 if (bounds_check_with_label(bp, vnd->sc_dk.dk_label, 1) <= 0) { 438 s = splbio(); 439 biodone(bp); 440 splx(s); 441 return; 442 } 443 444 /* 445 * bounds_check_with_label() changes bp->b_resid, reset it 446 */ 447 bp->b_resid = bp->b_bcount; 448 } 449 450 if (vnd->sc_flags & VNF_HAVELABEL) 451 sz = howmany(bp->b_bcount, vnd->sc_dk.dk_label->d_secsize); 452 else 453 sz = howmany(bp->b_bcount, DEV_BSIZE); 454 455 /* No bypassing of buffer cache? */ 456 if (vndsimple(bp->b_dev)) { 457 /* Loop until all queued requests are handled. */ 458 for (;;) { 459 int part = DISKPART(bp->b_dev); 460 daddr64_t off = DL_SECTOBLK(vnd->sc_dk.dk_label, 461 DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[part])); 462 aiov.iov_base = bp->b_data; 463 auio.uio_resid = aiov.iov_len = bp->b_bcount; 464 auio.uio_iov = &aiov; 465 auio.uio_iovcnt = 1; 466 auio.uio_offset = dbtob((off_t)(bp->b_blkno + off)); 467 auio.uio_segflg = UIO_SYSSPACE; 468 auio.uio_procp = p; 469 470 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p); 471 if (bp->b_flags & B_READ) { 472 auio.uio_rw = UIO_READ; 473 bp->b_error = VOP_READ(vnd->sc_vp, &auio, 0, 474 vnd->sc_cred); 475 if (vnd->sc_keyctx) 476 vndencrypt(vnd, bp->b_data, 477 bp->b_bcount, bp->b_blkno, 0); 478 } else { 479 if (vnd->sc_keyctx) 480 vndencrypt(vnd, bp->b_data, 481 bp->b_bcount, bp->b_blkno, 1); 482 auio.uio_rw = UIO_WRITE; 483 /* 484 * Upper layer has already checked I/O for 485 * limits, so there is no need to do it again. 486 */ 487 bp->b_error = VOP_WRITE(vnd->sc_vp, &auio, 488 IO_NOLIMIT, vnd->sc_cred); 489 /* Data in buffer cache needs to be in clear */ 490 if (vnd->sc_keyctx) 491 vndencrypt(vnd, bp->b_data, 492 bp->b_bcount, bp->b_blkno, 0); 493 } 494 VOP_UNLOCK(vnd->sc_vp, 0, p); 495 if (bp->b_error) 496 bp->b_flags |= B_ERROR; 497 bp->b_resid = auio.uio_resid; 498 s = splbio(); 499 biodone(bp); 500 splx(s); 501 502 /* If nothing more is queued, we are done. */ 503 if (!vnd->sc_tab.b_active) 504 return; 505 506 /* 507 * Dequeue now since lower level strategy 508 * routine might queue using same links. 509 */ 510 s = splbio(); 511 bp = vnd->sc_tab.b_actf; 512 vnd->sc_tab.b_actf = bp->b_actf; 513 vnd->sc_tab.b_active--; 514 splx(s); 515 } 516 } 517 518 if (vnd->sc_vp->v_type != VREG || vnd->sc_keyctx != NULL) { 519 bp->b_error = EINVAL; 520 bp->b_flags |= B_ERROR; 521 s = splbio(); 522 biodone(bp); 523 splx(s); 524 return; 525 } 526 527 /* The old-style buffercache bypassing method. */ 528 bn += DL_SECTOBLK(vnd->sc_dk.dk_label, 529 DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)])); 530 bn = dbtob(bn); 531 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 532 addr = bp->b_data; 533 flags = bp->b_flags | B_CALL; 534 for (resid = bp->b_resid; resid; resid -= sz) { 535 struct vnode *vp; 536 daddr64_t nbn; 537 int off, nra; 538 539 nra = 0; 540 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); 541 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 542 VOP_UNLOCK(vnd->sc_vp, 0, p); 543 if (error == 0 && (long)nbn == -1) 544 error = EIO; 545 #ifdef VNDDEBUG 546 if (!dovndcluster) 547 nra = 0; 548 #endif 549 550 if ((off = bn % bsize) != 0) 551 sz = bsize - off; 552 else 553 sz = (1 + nra) * bsize; 554 if (resid < sz) 555 sz = resid; 556 557 DNPRINTF(VDB_IO, "vndstrategy: vp %p/%p bn %x/%x sz %x\n", 558 vnd->sc_vp, vp, bn, nbn, sz); 559 560 s = splbio(); 561 nbp = getvndbuf(); 562 splx(s); 563 nbp->vb_buf.b_flags = flags; 564 nbp->vb_buf.b_bcount = sz; 565 nbp->vb_buf.b_bufsize = bp->b_bufsize; 566 nbp->vb_buf.b_error = 0; 567 if (vp->v_type == VBLK || vp->v_type == VCHR) 568 nbp->vb_buf.b_dev = vp->v_rdev; 569 else 570 nbp->vb_buf.b_dev = NODEV; 571 nbp->vb_buf.b_data = addr; 572 nbp->vb_buf.b_blkno = nbn + btodb(off); 573 nbp->vb_buf.b_proc = bp->b_proc; 574 nbp->vb_buf.b_iodone = vndiodone; 575 nbp->vb_buf.b_vp = vp; 576 nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff; 577 nbp->vb_buf.b_dirtyend = bp->b_dirtyend; 578 nbp->vb_buf.b_validoff = bp->b_validoff; 579 nbp->vb_buf.b_validend = bp->b_validend; 580 LIST_INIT(&nbp->vb_buf.b_dep); 581 582 /* save a reference to the old buffer */ 583 nbp->vb_obp = bp; 584 585 /* 586 * If there was an error or a hole in the file...punt. 587 * Note that we deal with this after the nbp allocation. 588 * This ensures that we properly clean up any operations 589 * that we have already fired off. 590 * 591 * XXX we could deal with holes here but it would be 592 * a hassle (in the write case). 593 * We must still however charge for the write even if there 594 * was an error. 595 */ 596 if (error) { 597 nbp->vb_buf.b_error = error; 598 nbp->vb_buf.b_flags |= B_ERROR; 599 bp->b_resid -= (resid - sz); 600 s = splbio(); 601 /* charge for the write */ 602 if ((nbp->vb_buf.b_flags & B_READ) == 0) 603 nbp->vb_buf.b_vp->v_numoutput++; 604 biodone(&nbp->vb_buf); 605 splx(s); 606 return; 607 } 608 /* 609 * Just sort by block number 610 */ 611 nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno; 612 s = splbio(); 613 disksort(&vnd->sc_tab, &nbp->vb_buf); 614 vnd->sc_tab.b_active++; 615 vndstart(vnd); 616 splx(s); 617 bn += sz; 618 addr += sz; 619 } 620 } 621 622 /* 623 * Feed requests sequentially. 624 * We do it this way to keep from flooding NFS servers if we are connected 625 * to an NFS file. This places the burden on the client rather than the 626 * server. 627 */ 628 void 629 vndstart(struct vnd_softc *vnd) 630 { 631 struct buf *bp; 632 633 /* 634 * Dequeue now since lower level strategy routine might 635 * queue using same links 636 */ 637 bp = vnd->sc_tab.b_actf; 638 vnd->sc_tab.b_actf = bp->b_actf; 639 640 DNPRINTF(VDB_IO, 641 "vndstart(%d): bp %p vp %p blkno %x addr %p cnt %lx\n", 642 vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data, 643 bp->b_bcount); 644 645 /* Instrumentation. */ 646 disk_busy(&vnd->sc_dk); 647 648 if ((bp->b_flags & B_READ) == 0) 649 bp->b_vp->v_numoutput++; 650 VOP_STRATEGY(bp); 651 } 652 653 void 654 vndiodone(struct buf *bp) 655 { 656 struct vndbuf *vbp = (struct vndbuf *) bp; 657 struct buf *pbp = vbp->vb_obp; 658 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 659 660 splassert(IPL_BIO); 661 662 DNPRINTF(VDB_IO, 663 "vndiodone(%d): vbp %p vp %p blkno %x addr %p cnt %lx\n", 664 vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, 665 vbp->vb_buf.b_data, vbp->vb_buf.b_bcount); 666 667 if (vbp->vb_buf.b_error) { 668 DNPRINTF(VDB_IO, "vndiodone: vbp %p error %d\n", vbp, 669 vbp->vb_buf.b_error); 670 671 pbp->b_flags |= B_ERROR; 672 /* XXX does this matter here? */ 673 (&vbp->vb_buf)->b_flags |= B_RAW; 674 pbp->b_error = biowait(&vbp->vb_buf); 675 } 676 pbp->b_resid -= vbp->vb_buf.b_bcount; 677 putvndbuf(vbp); 678 if (vnd->sc_tab.b_active) { 679 disk_unbusy(&vnd->sc_dk, (pbp->b_bcount - pbp->b_resid), 680 (pbp->b_flags & B_READ)); 681 if (!vnd->sc_tab.b_actf) 682 vnd->sc_tab.b_active--; 683 } 684 if (pbp->b_resid == 0) { 685 DNPRINTF(VDB_IO, "vndiodone: pbp %p iodone\n", pbp); 686 biodone(pbp); 687 } 688 689 } 690 691 /* ARGSUSED */ 692 int 693 vndread(dev_t dev, struct uio *uio, int flags) 694 { 695 int unit = vndunit(dev); 696 struct vnd_softc *sc; 697 698 DNPRINTF(VDB_FOLLOW, "vndread(%x, %p)\n", dev, uio); 699 700 if (unit >= numvnd) 701 return (ENXIO); 702 sc = &vnd_softc[unit]; 703 704 if ((sc->sc_flags & VNF_INITED) == 0) 705 return (ENXIO); 706 707 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 708 } 709 710 /* ARGSUSED */ 711 int 712 vndwrite(dev_t dev, struct uio *uio, int flags) 713 { 714 int unit = vndunit(dev); 715 struct vnd_softc *sc; 716 717 DNPRINTF(VDB_FOLLOW, "vndwrite(%x, %p)\n", dev, uio); 718 719 if (unit >= numvnd) 720 return (ENXIO); 721 sc = &vnd_softc[unit]; 722 723 if ((sc->sc_flags & VNF_INITED) == 0) 724 return (ENXIO); 725 726 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 727 } 728 729 size_t 730 vndbdevsize(struct vnode *vp, struct proc *p) 731 { 732 struct partinfo pi; 733 struct bdevsw *bsw; 734 dev_t dev; 735 736 dev = vp->v_rdev; 737 bsw = bdevsw_lookup(dev); 738 if (bsw->d_ioctl == NULL) 739 return (0); 740 if (bsw->d_ioctl(dev, DIOCGPART, (caddr_t)&pi, FREAD, p)) 741 return (0); 742 DNPRINTF(VDB_INIT, "vndbdevsize: size %li secsize %li\n", 743 (long)pi.part->p_size,(long)pi.disklab->d_secsize); 744 return (pi.part->p_size); 745 } 746 747 /* ARGSUSED */ 748 int 749 vndioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 750 { 751 int unit = vndunit(dev); 752 struct vnd_softc *vnd; 753 struct vnd_ioctl *vio; 754 struct vnd_user *vnu; 755 struct vattr vattr; 756 struct nameidata nd; 757 int error, part, pmask, s; 758 759 DNPRINTF(VDB_FOLLOW, "vndioctl(%x, %lx, %p, %x, %p): unit %d\n", 760 dev, cmd, addr, flag, p, unit); 761 762 error = suser(p, 0); 763 if (error) 764 return (error); 765 if (unit >= numvnd) 766 return (ENXIO); 767 768 vnd = &vnd_softc[unit]; 769 vio = (struct vnd_ioctl *)addr; 770 switch (cmd) { 771 772 case VNDIOCSET: 773 if (vnd->sc_flags & VNF_INITED) 774 return (EBUSY); 775 if (!(vnd->sc_flags & VNF_SIMPLE) && vio->vnd_keylen) 776 return (EINVAL); 777 778 if ((error = vndlock(vnd)) != 0) 779 return (error); 780 781 if ((error = copyinstr(vio->vnd_file, vnd->sc_file, 782 sizeof(vnd->sc_file), NULL))) { 783 vndunlock(vnd); 784 return (error); 785 } 786 787 bzero(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname)); 788 if (snprintf(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname), 789 "vnd%d", unit) >= sizeof(vnd->sc_dev.dv_xname)) { 790 printf("VNDIOCSET: device name too long\n"); 791 vndunlock(vnd); 792 return(ENXIO); 793 } 794 795 /* Set geometry for device. */ 796 vnd->sc_secsize = vio->vnd_secsize; 797 vnd->sc_ntracks = vio->vnd_ntracks; 798 vnd->sc_nsectors = vio->vnd_nsectors; 799 800 /* 801 * Open for read and write first. This lets vn_open() weed out 802 * directories, sockets, etc. so we don't have to worry about 803 * them. 804 */ 805 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 806 vnd->sc_flags &= ~VNF_READONLY; 807 error = vn_open(&nd, FREAD|FWRITE, 0); 808 if (error == EROFS) { 809 vnd->sc_flags |= VNF_READONLY; 810 error = vn_open(&nd, FREAD, 0); 811 } 812 if (error) { 813 vndunlock(vnd); 814 return (error); 815 } 816 817 if (nd.ni_vp->v_type != VREG && !vndsimple(dev)) { 818 VOP_UNLOCK(nd.ni_vp, 0, p); 819 vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p); 820 vndunlock(vnd); 821 return (EINVAL); 822 } 823 824 if (nd.ni_vp->v_type == VBLK) 825 vnd->sc_size = vndbdevsize(nd.ni_vp, p); 826 else { 827 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 828 if (error) { 829 VOP_UNLOCK(nd.ni_vp, 0, p); 830 vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p); 831 vndunlock(vnd); 832 return (error); 833 } 834 vnd->sc_size = vattr.va_size / vnd->sc_secsize; 835 } 836 VOP_UNLOCK(nd.ni_vp, 0, p); 837 vnd->sc_vp = nd.ni_vp; 838 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) { 839 (void) vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p); 840 vndunlock(vnd); 841 return (error); 842 } 843 844 if (vio->vnd_keylen > 0) { 845 char key[BLF_MAXUTILIZED]; 846 847 if (vio->vnd_keylen > sizeof(key)) 848 vio->vnd_keylen = sizeof(key); 849 850 if ((error = copyin(vio->vnd_key, key, 851 vio->vnd_keylen)) != 0) { 852 (void) vn_close(nd.ni_vp, VNDRW(vnd), 853 p->p_ucred, p); 854 vndunlock(vnd); 855 return (error); 856 } 857 858 vnd->sc_keyctx = malloc(sizeof(*vnd->sc_keyctx), M_DEVBUF, 859 M_WAITOK); 860 blf_key(vnd->sc_keyctx, key, vio->vnd_keylen); 861 bzero(key, vio->vnd_keylen); 862 } else 863 vnd->sc_keyctx = NULL; 864 865 vio->vnd_size = vnd->sc_size * vnd->sc_secsize; 866 vnd->sc_flags |= VNF_INITED; 867 868 DNPRINTF(VDB_INIT, "vndioctl: SET vp %p size %llx\n", 869 vnd->sc_vp, (unsigned long long)vnd->sc_size); 870 871 /* Attach the disk. */ 872 vnd->sc_dk.dk_driver = &vnddkdriver; 873 vnd->sc_dk.dk_name = vnd->sc_dev.dv_xname; 874 disk_attach(&vnd->sc_dk); 875 876 vndunlock(vnd); 877 878 break; 879 880 case VNDIOCCLR: 881 if ((vnd->sc_flags & VNF_INITED) == 0) 882 return (ENXIO); 883 884 if ((error = vndlock(vnd)) != 0) 885 return (error); 886 887 /* 888 * Don't unconfigure if any other partitions are open 889 * or if both the character and block flavors of this 890 * partition are open. 891 */ 892 part = DISKPART(dev); 893 pmask = (1 << part); 894 if ((vnd->sc_dk.dk_openmask & ~pmask) || 895 ((vnd->sc_dk.dk_bopenmask & pmask) && 896 (vnd->sc_dk.dk_copenmask & pmask))) { 897 vndunlock(vnd); 898 return (EBUSY); 899 } 900 901 vndclear(vnd); 902 DNPRINTF(VDB_INIT, "vndioctl: CLRed\n"); 903 904 /* Free crypto key */ 905 if (vnd->sc_keyctx) { 906 bzero(vnd->sc_keyctx, sizeof(*vnd->sc_keyctx)); 907 free(vnd->sc_keyctx, M_DEVBUF); 908 } 909 910 /* Detach the disk. */ 911 disk_detach(&vnd->sc_dk); 912 913 /* This must be atomic. */ 914 s = splhigh(); 915 vndunlock(vnd); 916 bzero(vnd, sizeof(struct vnd_softc)); 917 splx(s); 918 break; 919 920 case VNDIOCGET: 921 vnu = (struct vnd_user *)addr; 922 923 if (vnu->vnu_unit == -1) 924 vnu->vnu_unit = unit; 925 if (vnu->vnu_unit >= numvnd) 926 return (ENXIO); 927 if (vnu->vnu_unit < 0) 928 return (EINVAL); 929 930 vnd = &vnd_softc[vnu->vnu_unit]; 931 932 if (vnd->sc_flags & VNF_INITED) { 933 error = VOP_GETATTR(vnd->sc_vp, &vattr, p->p_ucred, p); 934 if (error) 935 return (error); 936 937 strlcpy(vnu->vnu_file, vnd->sc_file, 938 sizeof(vnu->vnu_file)); 939 vnu->vnu_dev = vattr.va_fsid; 940 vnu->vnu_ino = vattr.va_fileid; 941 } else { 942 vnu->vnu_dev = 0; 943 vnu->vnu_ino = 0; 944 } 945 946 break; 947 948 case DIOCGPDINFO: 949 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 950 return (ENOTTY); 951 vndgetdisklabel(dev, vnd, (struct disklabel *)addr, 1); 952 return (0); 953 954 case DIOCGDINFO: 955 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 956 return (ENOTTY); 957 *(struct disklabel *)addr = *(vnd->sc_dk.dk_label); 958 return (0); 959 960 case DIOCGPART: 961 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 962 return (ENOTTY); 963 ((struct partinfo *)addr)->disklab = vnd->sc_dk.dk_label; 964 ((struct partinfo *)addr)->part = 965 &vnd->sc_dk.dk_label->d_partitions[DISKPART(dev)]; 966 return (0); 967 968 case DIOCWDINFO: 969 case DIOCSDINFO: 970 if ((vnd->sc_flags & VNF_HAVELABEL) == 0) 971 return (ENOTTY); 972 if ((flag & FWRITE) == 0) 973 return (EBADF); 974 975 if ((error = vndlock(vnd)) != 0) 976 return (error); 977 vnd->sc_flags |= VNF_LABELLING; 978 979 error = setdisklabel(vnd->sc_dk.dk_label, 980 (struct disklabel *)addr, /*vnd->sc_dk.dk_openmask : */0); 981 if (error == 0) { 982 if (cmd == DIOCWDINFO) 983 error = writedisklabel(VNDLABELDEV(dev), 984 vndstrategy, vnd->sc_dk.dk_label); 985 } 986 987 vnd->sc_flags &= ~VNF_LABELLING; 988 vndunlock(vnd); 989 return (error); 990 991 case DIOCWLABEL: 992 if ((flag & FWRITE) == 0) 993 return (EBADF); 994 if (*(int *)addr) 995 vnd->sc_flags |= VNF_WLABEL; 996 else 997 vnd->sc_flags &= ~VNF_WLABEL; 998 return (0); 999 1000 default: 1001 return (ENOTTY); 1002 } 1003 1004 return (0); 1005 } 1006 1007 /* 1008 * Duplicate the current processes' credentials. Since we are called only 1009 * as the result of a SET ioctl and only root can do that, any future access 1010 * to this "disk" is essentially as root. Note that credentials may change 1011 * if some other uid can write directly to the mapped file (NFS). 1012 */ 1013 int 1014 vndsetcred(struct vnd_softc *vnd, struct ucred *cred) 1015 { 1016 struct uio auio; 1017 struct iovec aiov; 1018 char *tmpbuf; 1019 int error; 1020 struct proc *p = curproc; 1021 1022 vnd->sc_cred = crdup(cred); 1023 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1024 1025 /* XXX: Horrible kludge to establish credentials for NFS */ 1026 aiov.iov_base = tmpbuf; 1027 aiov.iov_len = MIN(DEV_BSIZE, vnd->sc_size * vnd->sc_secsize); 1028 auio.uio_iov = &aiov; 1029 auio.uio_iovcnt = 1; 1030 auio.uio_offset = 0; 1031 auio.uio_rw = UIO_READ; 1032 auio.uio_segflg = UIO_SYSSPACE; 1033 auio.uio_resid = aiov.iov_len; 1034 vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p); 1035 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1036 VOP_UNLOCK(vnd->sc_vp, 0, p); 1037 1038 free(tmpbuf, M_TEMP); 1039 return (error); 1040 } 1041 1042 void 1043 vndshutdown(void) 1044 { 1045 struct vnd_softc *vnd; 1046 1047 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1048 if (vnd->sc_flags & VNF_INITED) 1049 vndclear(vnd); 1050 } 1051 1052 void 1053 vndclear(struct vnd_softc *vnd) 1054 { 1055 struct vnode *vp = vnd->sc_vp; 1056 struct proc *p = curproc; /* XXX */ 1057 1058 DNPRINTF(VDB_FOLLOW, "vndclear(%p): vp %p\n", vnd, vp); 1059 1060 vnd->sc_flags &= ~VNF_INITED; 1061 if (vp == NULL) 1062 panic("vndioctl: null vp"); 1063 (void) vn_close(vp, VNDRW(vnd), vnd->sc_cred, p); 1064 crfree(vnd->sc_cred); 1065 vnd->sc_vp = NULL; 1066 vnd->sc_cred = NULL; 1067 vnd->sc_size = 0; 1068 } 1069 1070 daddr64_t 1071 vndsize(dev_t dev) 1072 { 1073 int unit = vndunit(dev); 1074 struct vnd_softc *vnd = &vnd_softc[unit]; 1075 1076 if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0) 1077 return (-1); 1078 return (vnd->sc_size * (vnd->sc_secsize / DEV_BSIZE)); 1079 } 1080 1081 int 1082 vnddump(dev_t dev, daddr64_t blkno, caddr_t va, size_t size) 1083 { 1084 1085 /* Not implemented. */ 1086 return (ENXIO); 1087 } 1088