1 /* $NetBSD: vnd.c,v 1.18 1995/02/27 19:31:00 cgd Exp $ */ 2 3 /* 4 * Copyright (c) 1988 University of Utah. 5 * Copyright (c) 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * the Systems Programming Group of the University of Utah Computer 10 * Science Department. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 41 * 42 * @(#)vn.c 8.6 (Berkeley) 4/1/94 43 */ 44 45 /* 46 * Vnode disk driver. 47 * 48 * Block/character interface to a vnode. Allows one to treat a file 49 * as a disk (e.g. build a filesystem in it, mount it, etc.). 50 * 51 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 52 * instead of a simple VOP_RDWR. We do this to avoid distorting the 53 * local buffer cache. 54 * 55 * NOTE 2: There is a security issue involved with this driver. 56 * Once mounted all access to the contents of the "mapped" file via 57 * the special file is controlled by the permissions on the special 58 * file, the protection of the mapped file is ignored (effectively, 59 * by using root credentials in all transactions). 60 * 61 * NOTE 3: Doesn't interact with leases, should it? 62 */ 63 #include "vnd.h" 64 #if NVND > 0 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/namei.h> 69 #include <sys/proc.h> 70 #include <sys/errno.h> 71 #include <sys/dkstat.h> 72 #include <sys/buf.h> 73 #include <sys/malloc.h> 74 #include <sys/ioctl.h> 75 #include <sys/disklabel.h> 76 #include <sys/mount.h> 77 #include <sys/vnode.h> 78 #include <sys/file.h> 79 #include <sys/uio.h> 80 81 #include <miscfs/specfs/specdev.h> 82 83 #include <dev/vndioctl.h> 84 85 #ifdef DEBUG 86 int dovndcluster = 1; 87 int vnddebug = 0x00; 88 #define VDB_FOLLOW 0x01 89 #define VDB_INIT 0x02 90 #define VDB_IO 0x04 91 #endif 92 93 #define b_cylin b_resid 94 95 #define vndunit(x) DISKUNIT(x) 96 97 struct vndbuf { 98 struct buf vb_buf; 99 struct buf *vb_obp; 100 }; 101 102 #define getvndbuf() \ 103 ((struct vndbuf *)malloc(sizeof(struct vndbuf), M_DEVBUF, M_WAITOK)) 104 #define putvndbuf(vbp) \ 105 free((caddr_t)(vbp), M_DEVBUF) 106 107 struct vnd_softc { 108 int sc_flags; /* flags */ 109 size_t sc_size; /* size of vnd */ 110 struct vnode *sc_vp; /* vnode */ 111 struct ucred *sc_cred; /* credentials */ 112 int sc_maxactive; /* max # of active requests */ 113 struct buf sc_tab; /* transfer queue */ 114 }; 115 116 /* sc_flags */ 117 #define VNF_ALIVE 0x01 118 #define VNF_INITED 0x02 119 120 #if 0 /* if you need static allocation */ 121 struct vnd_softc vn_softc[NVND]; 122 int numvnd = NVND; 123 #else 124 struct vnd_softc *vnd_softc; 125 int numvnd; 126 #endif 127 128 void vndclear __P((struct vnd_softc *)); 129 void vndstart __P((struct vnd_softc *)); 130 int vndsetcred __P((struct vnd_softc *, struct ucred *)); 131 void vndthrottle __P((struct vnd_softc *, struct vnode *)); 132 133 void 134 vndattach(num) 135 int num; 136 { 137 char *mem; 138 register u_long size; 139 140 if (num <= 0) 141 return; 142 size = num * sizeof(struct vnd_softc); 143 mem = malloc(size, M_DEVBUF, M_NOWAIT); 144 if (mem == NULL) { 145 printf("WARNING: no memory for vnode disks\n"); 146 return; 147 } 148 bzero(mem, size); 149 vnd_softc = (struct vnd_softc *)mem; 150 numvnd = num; 151 } 152 153 int 154 vndopen(dev, flags, mode, p) 155 dev_t dev; 156 int flags, mode; 157 struct proc *p; 158 { 159 int unit = vndunit(dev); 160 161 #ifdef DEBUG 162 if (vnddebug & VDB_FOLLOW) 163 printf("vndopen(%x, %x, %x, %x)\n", dev, flags, mode, p); 164 #endif 165 if (unit >= numvnd) 166 return(ENXIO); 167 return(0); 168 } 169 170 int 171 vndclose(dev, flags, mode, p) 172 dev_t dev; 173 int flags, mode; 174 struct proc *p; 175 { 176 #ifdef DEBUG 177 if (vnddebug & VDB_FOLLOW) 178 printf("vndclose(%x, %x, %x, %x)\n", dev, flags, mode, p); 179 #endif 180 return 0; 181 } 182 183 /* 184 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 185 * Note that this driver can only be used for swapping over NFS on the hp 186 * since nfs_strategy on the vax cannot handle u-areas and page tables. 187 */ 188 void 189 vndstrategy(bp) 190 register struct buf *bp; 191 { 192 int unit = vndunit(bp->b_dev); 193 register struct vnd_softc *vnd = &vnd_softc[unit]; 194 register struct vndbuf *nbp; 195 register int bn, bsize, resid; 196 register caddr_t addr; 197 int sz, flags, error; 198 extern void vndiodone(); 199 200 #ifdef DEBUG 201 if (vnddebug & VDB_FOLLOW) 202 printf("vndstrategy(%x): unit %d\n", bp, unit); 203 #endif 204 if ((vnd->sc_flags & VNF_INITED) == 0) { 205 bp->b_error = ENXIO; 206 bp->b_flags |= B_ERROR; 207 biodone(bp); 208 return; 209 } 210 bn = bp->b_blkno; 211 sz = howmany(bp->b_bcount, DEV_BSIZE); 212 bp->b_resid = bp->b_bcount; 213 if (bn < 0 || bn + sz > vnd->sc_size) { 214 if (bn != vnd->sc_size) { 215 bp->b_error = EINVAL; 216 bp->b_flags |= B_ERROR; 217 } 218 biodone(bp); 219 return; 220 } 221 bn = dbtob(bn); 222 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 223 addr = bp->b_data; 224 flags = bp->b_flags | B_CALL; 225 for (resid = bp->b_resid; resid; resid -= sz) { 226 struct vnode *vp; 227 daddr_t nbn; 228 int off, s, nra; 229 230 nra = 0; 231 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 232 if (error == 0 && (long)nbn == -1) 233 error = EIO; 234 #ifdef DEBUG 235 if (!dovndcluster) 236 nra = 0; 237 #endif 238 239 if (off = bn % bsize) 240 sz = bsize - off; 241 else 242 sz = (1 + nra) * bsize; 243 if (resid < sz) 244 sz = resid; 245 #ifdef DEBUG 246 if (vnddebug & VDB_IO) 247 printf("vndstrategy: vp %x/%x bn %x/%x sz %x\n", 248 vnd->sc_vp, vp, bn, nbn, sz); 249 #endif 250 251 nbp = getvndbuf(); 252 nbp->vb_buf.b_flags = flags; 253 nbp->vb_buf.b_bcount = sz; 254 nbp->vb_buf.b_bufsize = bp->b_bufsize; 255 nbp->vb_buf.b_error = 0; 256 if (vp->v_type == VBLK || vp->v_type == VCHR) 257 nbp->vb_buf.b_dev = vp->v_rdev; 258 else 259 nbp->vb_buf.b_dev = NODEV; 260 nbp->vb_buf.b_data = addr; 261 nbp->vb_buf.b_blkno = nbn + btodb(off); 262 nbp->vb_buf.b_proc = bp->b_proc; 263 nbp->vb_buf.b_iodone = vndiodone; 264 nbp->vb_buf.b_vp = vp; 265 nbp->vb_buf.b_rcred = vnd->sc_cred; /* XXX crdup? */ 266 nbp->vb_buf.b_wcred = vnd->sc_cred; /* XXX crdup? */ 267 nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff; 268 nbp->vb_buf.b_dirtyend = bp->b_dirtyend; 269 nbp->vb_buf.b_validoff = bp->b_validoff; 270 nbp->vb_buf.b_validend = bp->b_validend; 271 272 /* save a reference to the old buffer */ 273 nbp->vb_obp = bp; 274 275 /* 276 * If there was an error or a hole in the file...punt. 277 * Note that we deal with this after the nbp allocation. 278 * This ensures that we properly clean up any operations 279 * that we have already fired off. 280 * 281 * XXX we could deal with holes here but it would be 282 * a hassle (in the write case). 283 */ 284 if (error) { 285 nbp->vb_buf.b_error = error; 286 nbp->vb_buf.b_flags |= B_ERROR; 287 bp->b_resid -= (resid - sz); 288 biodone(&nbp->vb_buf); 289 return; 290 } 291 /* 292 * Just sort by block number 293 */ 294 nbp->vb_buf.b_cylin = nbp->vb_buf.b_blkno; 295 s = splbio(); 296 disksort(&vnd->sc_tab, &nbp->vb_buf); 297 if (vnd->sc_tab.b_active < vnd->sc_maxactive) { 298 vnd->sc_tab.b_active++; 299 vndstart(vnd); 300 } 301 splx(s); 302 bn += sz; 303 addr += sz; 304 } 305 } 306 307 /* 308 * Feed requests sequentially. 309 * We do it this way to keep from flooding NFS servers if we are connected 310 * to an NFS file. This places the burden on the client rather than the 311 * server. 312 */ 313 void 314 vndstart(vnd) 315 register struct vnd_softc *vnd; 316 { 317 register struct buf *bp; 318 319 /* 320 * Dequeue now since lower level strategy routine might 321 * queue using same links 322 */ 323 bp = vnd->sc_tab.b_actf; 324 vnd->sc_tab.b_actf = bp->b_actf; 325 #ifdef DEBUG 326 if (vnddebug & VDB_IO) 327 printf("vndstart(%d): bp %x vp %x blkno %x addr %x cnt %x\n", 328 vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data, 329 bp->b_bcount); 330 #endif 331 if ((bp->b_flags & B_READ) == 0) 332 bp->b_vp->v_numoutput++; 333 VOP_STRATEGY(bp); 334 } 335 336 void 337 vndiodone(vbp) 338 register struct vndbuf *vbp; 339 { 340 register struct buf *pbp = vbp->vb_obp; 341 register struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 342 int s; 343 344 s = splbio(); 345 #ifdef DEBUG 346 if (vnddebug & VDB_IO) 347 printf("vndiodone(%d): vbp %x vp %x blkno %x addr %x cnt %x\n", 348 vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, 349 vbp->vb_buf.b_data, vbp->vb_buf.b_bcount); 350 #endif 351 if (vbp->vb_buf.b_error) { 352 #ifdef DEBUG 353 if (vnddebug & VDB_IO) 354 printf("vndiodone: vbp %x error %d\n", vbp, 355 vbp->vb_buf.b_error); 356 #endif 357 pbp->b_flags |= B_ERROR; 358 pbp->b_error = biowait(&vbp->vb_buf); 359 } 360 pbp->b_resid -= vbp->vb_buf.b_bcount; 361 putvndbuf(vbp); 362 if (pbp->b_resid == 0) { 363 #ifdef DEBUG 364 if (vnddebug & VDB_IO) 365 printf("vndiodone: pbp %x iodone\n", pbp); 366 #endif 367 biodone(pbp); 368 } 369 if (vnd->sc_tab.b_actf) 370 vndstart(vnd); 371 else 372 vnd->sc_tab.b_active--; 373 splx(s); 374 } 375 376 /* ARGSUSED */ 377 int 378 vndioctl(dev, cmd, data, flag, p) 379 dev_t dev; 380 u_long cmd; 381 caddr_t data; 382 int flag; 383 struct proc *p; 384 { 385 int unit = vndunit(dev); 386 register struct vnd_softc *vnd; 387 struct vnd_ioctl *vio; 388 struct vattr vattr; 389 struct nameidata nd; 390 int error; 391 392 #ifdef DEBUG 393 if (vnddebug & VDB_FOLLOW) 394 printf("vndioctl(%x, %lx, %x, %x, %x): unit %d\n", 395 dev, cmd, data, flag, p, unit); 396 #endif 397 error = suser(p->p_ucred, &p->p_acflag); 398 if (error) 399 return (error); 400 if (unit >= numvnd) 401 return (ENXIO); 402 403 vnd = &vnd_softc[unit]; 404 vio = (struct vnd_ioctl *)data; 405 switch (cmd) { 406 407 case VNDIOCSET: 408 if (vnd->sc_flags & VNF_INITED) 409 return(EBUSY); 410 /* 411 * Always open for read and write. 412 * This is probably bogus, but it lets vn_open() 413 * weed out directories, sockets, etc. so we don't 414 * have to worry about them. 415 */ 416 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 417 if (error = vn_open(&nd, FREAD|FWRITE, 0)) 418 return(error); 419 if (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p)) { 420 VOP_UNLOCK(nd.ni_vp); 421 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 422 return(error); 423 } 424 VOP_UNLOCK(nd.ni_vp); 425 vnd->sc_vp = nd.ni_vp; 426 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 427 if (error = vndsetcred(vnd, p->p_ucred)) { 428 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 429 return(error); 430 } 431 vndthrottle(vnd, vnd->sc_vp); 432 vio->vnd_size = dbtob(vnd->sc_size); 433 vnd->sc_flags |= VNF_INITED; 434 #ifdef DEBUG 435 if (vnddebug & VDB_INIT) 436 printf("vndioctl: SET vp %x size %x\n", 437 vnd->sc_vp, vnd->sc_size); 438 #endif 439 break; 440 441 case VNDIOCCLR: 442 if ((vnd->sc_flags & VNF_INITED) == 0) 443 return(ENXIO); 444 vndclear(vnd); 445 #ifdef DEBUG 446 if (vnddebug & VDB_INIT) 447 printf("vndioctl: CLRed\n"); 448 #endif 449 break; 450 451 default: 452 return(ENOTTY); 453 } 454 return(0); 455 } 456 457 /* 458 * Duplicate the current processes' credentials. Since we are called only 459 * as the result of a SET ioctl and only root can do that, any future access 460 * to this "disk" is essentially as root. Note that credentials may change 461 * if some other uid can write directly to the mapped file (NFS). 462 */ 463 int 464 vndsetcred(vnd, cred) 465 register struct vnd_softc *vnd; 466 struct ucred *cred; 467 { 468 struct uio auio; 469 struct iovec aiov; 470 char *tmpbuf; 471 int error; 472 473 vnd->sc_cred = crdup(cred); 474 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 475 476 /* XXX: Horrible kludge to establish credentials for NFS */ 477 aiov.iov_base = tmpbuf; 478 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 479 auio.uio_iov = &aiov; 480 auio.uio_iovcnt = 1; 481 auio.uio_offset = 0; 482 auio.uio_rw = UIO_READ; 483 auio.uio_segflg = UIO_SYSSPACE; 484 auio.uio_resid = aiov.iov_len; 485 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 486 487 free(tmpbuf, M_TEMP); 488 return (error); 489 } 490 491 /* 492 * Set maxactive based on FS type 493 */ 494 void 495 vndthrottle(vnd, vp) 496 register struct vnd_softc *vnd; 497 struct vnode *vp; 498 { 499 #ifdef NFSCLIENT 500 extern int (**nfsv2_vnodeop_p)(); 501 502 if (vp->v_op == nfsv2_vnodeop_p) 503 vnd->sc_maxactive = 2; 504 else 505 #endif 506 vnd->sc_maxactive = 8; 507 508 if (vnd->sc_maxactive < 1) 509 vnd->sc_maxactive = 1; 510 } 511 512 void 513 vndshutdown() 514 { 515 register struct vnd_softc *vnd; 516 517 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 518 if (vnd->sc_flags & VNF_INITED) 519 vndclear(vnd); 520 } 521 522 void 523 vndclear(vnd) 524 register struct vnd_softc *vnd; 525 { 526 register struct vnode *vp = vnd->sc_vp; 527 struct proc *p = curproc; /* XXX */ 528 529 #ifdef DEBUG 530 if (vnddebug & VDB_FOLLOW) 531 printf("vndclear(%x): vp %x\n", vp); 532 #endif 533 vnd->sc_flags &= ~VNF_INITED; 534 if (vp == (struct vnode *)0) 535 panic("vndioctl: null vp"); 536 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 537 crfree(vnd->sc_cred); 538 vnd->sc_vp = (struct vnode *)0; 539 vnd->sc_cred = (struct ucred *)0; 540 vnd->sc_size = 0; 541 } 542 543 int 544 vndsize(dev) 545 dev_t dev; 546 { 547 int unit = vndunit(dev); 548 register struct vnd_softc *vnd = &vnd_softc[unit]; 549 550 if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0) 551 return(-1); 552 return(vnd->sc_size); 553 } 554 555 int 556 vnddump(dev) 557 dev_t dev; 558 { 559 560 return(ENXIO); 561 } 562 #endif 563