xref: /netbsd-src/sys/dev/vnd.c (revision ce0bb6e8d2e560ecacbe865a848624f94498063b)
1 /*	$NetBSD: vnd.c,v 1.18 1995/02/27 19:31:00 cgd Exp $	*/
2 
3 /*
4  * Copyright (c) 1988 University of Utah.
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * the Systems Programming Group of the University of Utah Computer
10  * Science Department.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  * from: Utah $Hdr: vn.c 1.13 94/04/02$
41  *
42  *	@(#)vn.c	8.6 (Berkeley) 4/1/94
43  */
44 
45 /*
46  * Vnode disk driver.
47  *
48  * Block/character interface to a vnode.  Allows one to treat a file
49  * as a disk (e.g. build a filesystem in it, mount it, etc.).
50  *
51  * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
52  * instead of a simple VOP_RDWR.  We do this to avoid distorting the
53  * local buffer cache.
54  *
55  * NOTE 2: There is a security issue involved with this driver.
56  * Once mounted all access to the contents of the "mapped" file via
57  * the special file is controlled by the permissions on the special
58  * file, the protection of the mapped file is ignored (effectively,
59  * by using root credentials in all transactions).
60  *
61  * NOTE 3: Doesn't interact with leases, should it?
62  */
63 #include "vnd.h"
64 #if NVND > 0
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/namei.h>
69 #include <sys/proc.h>
70 #include <sys/errno.h>
71 #include <sys/dkstat.h>
72 #include <sys/buf.h>
73 #include <sys/malloc.h>
74 #include <sys/ioctl.h>
75 #include <sys/disklabel.h>
76 #include <sys/mount.h>
77 #include <sys/vnode.h>
78 #include <sys/file.h>
79 #include <sys/uio.h>
80 
81 #include <miscfs/specfs/specdev.h>
82 
83 #include <dev/vndioctl.h>
84 
85 #ifdef DEBUG
86 int dovndcluster = 1;
87 int vnddebug = 0x00;
88 #define VDB_FOLLOW	0x01
89 #define VDB_INIT	0x02
90 #define VDB_IO		0x04
91 #endif
92 
93 #define b_cylin	b_resid
94 
95 #define	vndunit(x)	DISKUNIT(x)
96 
97 struct vndbuf {
98 	struct buf	vb_buf;
99 	struct buf	*vb_obp;
100 };
101 
102 #define	getvndbuf()	\
103 	((struct vndbuf *)malloc(sizeof(struct vndbuf), M_DEVBUF, M_WAITOK))
104 #define putvndbuf(vbp)	\
105 	free((caddr_t)(vbp), M_DEVBUF)
106 
107 struct vnd_softc {
108 	int		 sc_flags;	/* flags */
109 	size_t		 sc_size;	/* size of vnd */
110 	struct vnode	*sc_vp;		/* vnode */
111 	struct ucred	*sc_cred;	/* credentials */
112 	int		 sc_maxactive;	/* max # of active requests */
113 	struct buf	 sc_tab;	/* transfer queue */
114 };
115 
116 /* sc_flags */
117 #define	VNF_ALIVE	0x01
118 #define VNF_INITED	0x02
119 
120 #if 0	/* if you need static allocation */
121 struct vnd_softc vn_softc[NVND];
122 int numvnd = NVND;
123 #else
124 struct vnd_softc *vnd_softc;
125 int numvnd;
126 #endif
127 
128 void	vndclear __P((struct vnd_softc *));
129 void	vndstart __P((struct vnd_softc *));
130 int	vndsetcred __P((struct vnd_softc *, struct ucred *));
131 void	vndthrottle __P((struct vnd_softc *, struct vnode *));
132 
133 void
134 vndattach(num)
135 	int num;
136 {
137 	char *mem;
138 	register u_long size;
139 
140 	if (num <= 0)
141 		return;
142 	size = num * sizeof(struct vnd_softc);
143 	mem = malloc(size, M_DEVBUF, M_NOWAIT);
144 	if (mem == NULL) {
145 		printf("WARNING: no memory for vnode disks\n");
146 		return;
147 	}
148 	bzero(mem, size);
149 	vnd_softc = (struct vnd_softc *)mem;
150 	numvnd = num;
151 }
152 
153 int
154 vndopen(dev, flags, mode, p)
155 	dev_t dev;
156 	int flags, mode;
157 	struct proc *p;
158 {
159 	int unit = vndunit(dev);
160 
161 #ifdef DEBUG
162 	if (vnddebug & VDB_FOLLOW)
163 		printf("vndopen(%x, %x, %x, %x)\n", dev, flags, mode, p);
164 #endif
165 	if (unit >= numvnd)
166 		return(ENXIO);
167 	return(0);
168 }
169 
170 int
171 vndclose(dev, flags, mode, p)
172 	dev_t dev;
173 	int flags, mode;
174 	struct proc *p;
175 {
176 #ifdef DEBUG
177 	if (vnddebug & VDB_FOLLOW)
178 		printf("vndclose(%x, %x, %x, %x)\n", dev, flags, mode, p);
179 #endif
180 	return 0;
181 }
182 
183 /*
184  * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
185  * Note that this driver can only be used for swapping over NFS on the hp
186  * since nfs_strategy on the vax cannot handle u-areas and page tables.
187  */
188 void
189 vndstrategy(bp)
190 	register struct buf *bp;
191 {
192 	int unit = vndunit(bp->b_dev);
193 	register struct vnd_softc *vnd = &vnd_softc[unit];
194 	register struct vndbuf *nbp;
195 	register int bn, bsize, resid;
196 	register caddr_t addr;
197 	int sz, flags, error;
198 	extern void vndiodone();
199 
200 #ifdef DEBUG
201 	if (vnddebug & VDB_FOLLOW)
202 		printf("vndstrategy(%x): unit %d\n", bp, unit);
203 #endif
204 	if ((vnd->sc_flags & VNF_INITED) == 0) {
205 		bp->b_error = ENXIO;
206 		bp->b_flags |= B_ERROR;
207 		biodone(bp);
208 		return;
209 	}
210 	bn = bp->b_blkno;
211 	sz = howmany(bp->b_bcount, DEV_BSIZE);
212 	bp->b_resid = bp->b_bcount;
213 	if (bn < 0 || bn + sz > vnd->sc_size) {
214 		if (bn != vnd->sc_size) {
215 			bp->b_error = EINVAL;
216 			bp->b_flags |= B_ERROR;
217 		}
218 		biodone(bp);
219 		return;
220 	}
221 	bn = dbtob(bn);
222  	bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
223 	addr = bp->b_data;
224 	flags = bp->b_flags | B_CALL;
225 	for (resid = bp->b_resid; resid; resid -= sz) {
226 		struct vnode *vp;
227 		daddr_t nbn;
228 		int off, s, nra;
229 
230 		nra = 0;
231 		error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
232 		if (error == 0 && (long)nbn == -1)
233 			error = EIO;
234 #ifdef DEBUG
235 		if (!dovndcluster)
236 			nra = 0;
237 #endif
238 
239 		if (off = bn % bsize)
240 			sz = bsize - off;
241 		else
242 			sz = (1 + nra) * bsize;
243 		if (resid < sz)
244 			sz = resid;
245 #ifdef DEBUG
246 		if (vnddebug & VDB_IO)
247 			printf("vndstrategy: vp %x/%x bn %x/%x sz %x\n",
248 			       vnd->sc_vp, vp, bn, nbn, sz);
249 #endif
250 
251 		nbp = getvndbuf();
252 		nbp->vb_buf.b_flags = flags;
253 		nbp->vb_buf.b_bcount = sz;
254 		nbp->vb_buf.b_bufsize = bp->b_bufsize;
255 		nbp->vb_buf.b_error = 0;
256 		if (vp->v_type == VBLK || vp->v_type == VCHR)
257 			nbp->vb_buf.b_dev = vp->v_rdev;
258 		else
259 			nbp->vb_buf.b_dev = NODEV;
260 		nbp->vb_buf.b_data = addr;
261 		nbp->vb_buf.b_blkno = nbn + btodb(off);
262 		nbp->vb_buf.b_proc = bp->b_proc;
263 		nbp->vb_buf.b_iodone = vndiodone;
264 		nbp->vb_buf.b_vp = vp;
265 		nbp->vb_buf.b_rcred = vnd->sc_cred;	/* XXX crdup? */
266 		nbp->vb_buf.b_wcred = vnd->sc_cred;	/* XXX crdup? */
267 		nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff;
268 		nbp->vb_buf.b_dirtyend = bp->b_dirtyend;
269 		nbp->vb_buf.b_validoff = bp->b_validoff;
270 		nbp->vb_buf.b_validend = bp->b_validend;
271 
272 		/* save a reference to the old buffer */
273 		nbp->vb_obp = bp;
274 
275 		/*
276 		 * If there was an error or a hole in the file...punt.
277 		 * Note that we deal with this after the nbp allocation.
278 		 * This ensures that we properly clean up any operations
279 		 * that we have already fired off.
280 		 *
281 		 * XXX we could deal with holes here but it would be
282 		 * a hassle (in the write case).
283 		 */
284 		if (error) {
285 			nbp->vb_buf.b_error = error;
286 			nbp->vb_buf.b_flags |= B_ERROR;
287 			bp->b_resid -= (resid - sz);
288 			biodone(&nbp->vb_buf);
289 			return;
290 		}
291 		/*
292 		 * Just sort by block number
293 		 */
294 		nbp->vb_buf.b_cylin = nbp->vb_buf.b_blkno;
295 		s = splbio();
296 		disksort(&vnd->sc_tab, &nbp->vb_buf);
297 		if (vnd->sc_tab.b_active < vnd->sc_maxactive) {
298 			vnd->sc_tab.b_active++;
299 			vndstart(vnd);
300 		}
301 		splx(s);
302 		bn += sz;
303 		addr += sz;
304 	}
305 }
306 
307 /*
308  * Feed requests sequentially.
309  * We do it this way to keep from flooding NFS servers if we are connected
310  * to an NFS file.  This places the burden on the client rather than the
311  * server.
312  */
313 void
314 vndstart(vnd)
315 	register struct vnd_softc *vnd;
316 {
317 	register struct buf *bp;
318 
319 	/*
320 	 * Dequeue now since lower level strategy routine might
321 	 * queue using same links
322 	 */
323 	bp = vnd->sc_tab.b_actf;
324 	vnd->sc_tab.b_actf = bp->b_actf;
325 #ifdef DEBUG
326 	if (vnddebug & VDB_IO)
327 		printf("vndstart(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
328 		    vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
329 		    bp->b_bcount);
330 #endif
331 	if ((bp->b_flags & B_READ) == 0)
332 		bp->b_vp->v_numoutput++;
333 	VOP_STRATEGY(bp);
334 }
335 
336 void
337 vndiodone(vbp)
338 	register struct vndbuf *vbp;
339 {
340 	register struct buf *pbp = vbp->vb_obp;
341 	register struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)];
342 	int s;
343 
344 	s = splbio();
345 #ifdef DEBUG
346 	if (vnddebug & VDB_IO)
347 		printf("vndiodone(%d): vbp %x vp %x blkno %x addr %x cnt %x\n",
348 		    vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno,
349 		    vbp->vb_buf.b_data, vbp->vb_buf.b_bcount);
350 #endif
351 	if (vbp->vb_buf.b_error) {
352 #ifdef DEBUG
353 		if (vnddebug & VDB_IO)
354 			printf("vndiodone: vbp %x error %d\n", vbp,
355 			    vbp->vb_buf.b_error);
356 #endif
357 		pbp->b_flags |= B_ERROR;
358 		pbp->b_error = biowait(&vbp->vb_buf);
359 	}
360 	pbp->b_resid -= vbp->vb_buf.b_bcount;
361 	putvndbuf(vbp);
362 	if (pbp->b_resid == 0) {
363 #ifdef DEBUG
364 		if (vnddebug & VDB_IO)
365 			printf("vndiodone: pbp %x iodone\n", pbp);
366 #endif
367 		biodone(pbp);
368 	}
369 	if (vnd->sc_tab.b_actf)
370 		vndstart(vnd);
371 	else
372 		vnd->sc_tab.b_active--;
373 	splx(s);
374 }
375 
376 /* ARGSUSED */
377 int
378 vndioctl(dev, cmd, data, flag, p)
379 	dev_t dev;
380 	u_long cmd;
381 	caddr_t data;
382 	int flag;
383 	struct proc *p;
384 {
385 	int unit = vndunit(dev);
386 	register struct vnd_softc *vnd;
387 	struct vnd_ioctl *vio;
388 	struct vattr vattr;
389 	struct nameidata nd;
390 	int error;
391 
392 #ifdef DEBUG
393 	if (vnddebug & VDB_FOLLOW)
394 		printf("vndioctl(%x, %lx, %x, %x, %x): unit %d\n",
395 		    dev, cmd, data, flag, p, unit);
396 #endif
397 	error = suser(p->p_ucred, &p->p_acflag);
398 	if (error)
399 		return (error);
400 	if (unit >= numvnd)
401 		return (ENXIO);
402 
403 	vnd = &vnd_softc[unit];
404 	vio = (struct vnd_ioctl *)data;
405 	switch (cmd) {
406 
407 	case VNDIOCSET:
408 		if (vnd->sc_flags & VNF_INITED)
409 			return(EBUSY);
410 		/*
411 		 * Always open for read and write.
412 		 * This is probably bogus, but it lets vn_open()
413 		 * weed out directories, sockets, etc. so we don't
414 		 * have to worry about them.
415 		 */
416 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p);
417 		if (error = vn_open(&nd, FREAD|FWRITE, 0))
418 			return(error);
419 		if (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p)) {
420 			VOP_UNLOCK(nd.ni_vp);
421 			(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
422 			return(error);
423 		}
424 		VOP_UNLOCK(nd.ni_vp);
425 		vnd->sc_vp = nd.ni_vp;
426 		vnd->sc_size = btodb(vattr.va_size);	/* note truncation */
427 		if (error = vndsetcred(vnd, p->p_ucred)) {
428 			(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
429 			return(error);
430 		}
431 		vndthrottle(vnd, vnd->sc_vp);
432 		vio->vnd_size = dbtob(vnd->sc_size);
433 		vnd->sc_flags |= VNF_INITED;
434 #ifdef DEBUG
435 		if (vnddebug & VDB_INIT)
436 			printf("vndioctl: SET vp %x size %x\n",
437 			    vnd->sc_vp, vnd->sc_size);
438 #endif
439 		break;
440 
441 	case VNDIOCCLR:
442 		if ((vnd->sc_flags & VNF_INITED) == 0)
443 			return(ENXIO);
444 		vndclear(vnd);
445 #ifdef DEBUG
446 		if (vnddebug & VDB_INIT)
447 			printf("vndioctl: CLRed\n");
448 #endif
449 		break;
450 
451 	default:
452 		return(ENOTTY);
453 	}
454 	return(0);
455 }
456 
457 /*
458  * Duplicate the current processes' credentials.  Since we are called only
459  * as the result of a SET ioctl and only root can do that, any future access
460  * to this "disk" is essentially as root.  Note that credentials may change
461  * if some other uid can write directly to the mapped file (NFS).
462  */
463 int
464 vndsetcred(vnd, cred)
465 	register struct vnd_softc *vnd;
466 	struct ucred *cred;
467 {
468 	struct uio auio;
469 	struct iovec aiov;
470 	char *tmpbuf;
471 	int error;
472 
473 	vnd->sc_cred = crdup(cred);
474 	tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
475 
476 	/* XXX: Horrible kludge to establish credentials for NFS */
477 	aiov.iov_base = tmpbuf;
478 	aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size));
479 	auio.uio_iov = &aiov;
480 	auio.uio_iovcnt = 1;
481 	auio.uio_offset = 0;
482 	auio.uio_rw = UIO_READ;
483 	auio.uio_segflg = UIO_SYSSPACE;
484 	auio.uio_resid = aiov.iov_len;
485 	error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
486 
487 	free(tmpbuf, M_TEMP);
488 	return (error);
489 }
490 
491 /*
492  * Set maxactive based on FS type
493  */
494 void
495 vndthrottle(vnd, vp)
496 	register struct vnd_softc *vnd;
497 	struct vnode *vp;
498 {
499 #ifdef NFSCLIENT
500 	extern int (**nfsv2_vnodeop_p)();
501 
502 	if (vp->v_op == nfsv2_vnodeop_p)
503 		vnd->sc_maxactive = 2;
504 	else
505 #endif
506 		vnd->sc_maxactive = 8;
507 
508 	if (vnd->sc_maxactive < 1)
509 		vnd->sc_maxactive = 1;
510 }
511 
512 void
513 vndshutdown()
514 {
515 	register struct vnd_softc *vnd;
516 
517 	for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
518 		if (vnd->sc_flags & VNF_INITED)
519 			vndclear(vnd);
520 }
521 
522 void
523 vndclear(vnd)
524 	register struct vnd_softc *vnd;
525 {
526 	register struct vnode *vp = vnd->sc_vp;
527 	struct proc *p = curproc;		/* XXX */
528 
529 #ifdef DEBUG
530 	if (vnddebug & VDB_FOLLOW)
531 		printf("vndclear(%x): vp %x\n", vp);
532 #endif
533 	vnd->sc_flags &= ~VNF_INITED;
534 	if (vp == (struct vnode *)0)
535 		panic("vndioctl: null vp");
536 	(void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p);
537 	crfree(vnd->sc_cred);
538 	vnd->sc_vp = (struct vnode *)0;
539 	vnd->sc_cred = (struct ucred *)0;
540 	vnd->sc_size = 0;
541 }
542 
543 int
544 vndsize(dev)
545 	dev_t dev;
546 {
547 	int unit = vndunit(dev);
548 	register struct vnd_softc *vnd = &vnd_softc[unit];
549 
550 	if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0)
551 		return(-1);
552 	return(vnd->sc_size);
553 }
554 
555 int
556 vnddump(dev)
557 	dev_t dev;
558 {
559 
560 	return(ENXIO);
561 }
562 #endif
563