xref: /csrg-svn/sys/dev/vn.c (revision 49299)
141480Smckusick /*
241480Smckusick  * Copyright (c) 1988 University of Utah.
341480Smckusick  * Copyright (c) 1990 The Regents of the University of California.
441480Smckusick  * All rights reserved.
541480Smckusick  *
641480Smckusick  * This code is derived from software contributed to Berkeley by
741480Smckusick  * the Systems Programming Group of the University of Utah Computer
841480Smckusick  * Science Department.
941480Smckusick  *
1041480Smckusick  * %sccs.include.redist.c%
1141480Smckusick  *
12*49299Shibler  * from: Utah $Hdr: vn.c 1.1 91/04/30$
1341480Smckusick  *
14*49299Shibler  *	@(#)vn.c	7.5 (Berkeley) 05/07/91
1541480Smckusick  */
1641480Smckusick 
1741480Smckusick /*
18*49299Shibler  * Vnode disk driver.
1941480Smckusick  *
20*49299Shibler  * Block/character interface to a vnode.  Allows one to treat a file
21*49299Shibler  * as a disk (e.g. build a filesystem in it, mount it, etc.).
2241480Smckusick  *
23*49299Shibler  * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
24*49299Shibler  * instead of a simple VOP_RDWR.  We do this to avoid distorting the
25*49299Shibler  * local buffer cache.
26*49299Shibler  *
27*49299Shibler  * NOTE 2: There is a security issue involved with this driver.
2841480Smckusick  * Once mounted all access to the contents of the "mapped" file via
2941480Smckusick  * the special file is controlled by the permissions on the special
3041480Smckusick  * file, the protection of the mapped file is ignored (effectively,
3141480Smckusick  * by using root credentials in all transactions).
3241480Smckusick  */
33*49299Shibler #include "vn.h"
34*49299Shibler #if NVN > 0
3541480Smckusick 
3645788Sbostic #include "sys/param.h"
3745788Sbostic #include "sys/systm.h"
38*49299Shibler #include "sys/namei.h"
39*49299Shibler #include "sys/proc.h"
4045788Sbostic #include "sys/errno.h"
4145788Sbostic #include "sys/dkstat.h"
42*49299Shibler #include "sys/buf.h"
43*49299Shibler #include "sys/malloc.h"
4445788Sbostic #include "sys/ioctl.h"
45*49299Shibler #include "sys/mount.h"
4645788Sbostic #include "sys/vnode.h"
47*49299Shibler #include "sys/specdev.h"
4845788Sbostic #include "sys/file.h"
4945788Sbostic #include "sys/uio.h"
5041480Smckusick 
51*49299Shibler #include "vnioctl.h"
5241480Smckusick 
5341480Smckusick #ifdef DEBUG
54*49299Shibler int vndebug = 0x00;
55*49299Shibler #define VDB_FOLLOW	0x01
56*49299Shibler #define VDB_INIT	0x02
57*49299Shibler #define VDB_IO		0x04
5841480Smckusick #endif
5941480Smckusick 
60*49299Shibler struct	buf vnbuf[NVN];
61*49299Shibler struct	buf vntab[NVN];
6241480Smckusick 
6341480Smckusick #define b_cylin	b_resid
6441480Smckusick 
65*49299Shibler #define	vnunit(x)	((minor(x) >> 3) & 0x7)	/* for consistency */
6641480Smckusick 
67*49299Shibler #define	getvnbuf()	\
6841480Smckusick 	((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK))
69*49299Shibler #define putvnbuf(bp)	\
7041480Smckusick 	free((caddr_t)(bp), M_DEVBUF)
7141480Smckusick 
72*49299Shibler struct vn_softc {
7341480Smckusick 	int		 sc_flags;	/* flags */
74*49299Shibler 	size_t		 sc_size;	/* size of vn */
7541480Smckusick 	struct vnode	*sc_vp;		/* vnode */
7641480Smckusick 	struct ucred	*sc_cred;	/* credentials */
7741480Smckusick 	int		 sc_maxactive;	/* max # of active requests */
78*49299Shibler } vn_softc[NVN];
7941480Smckusick 
8041480Smckusick /* sc_flags */
81*49299Shibler #define	VNF_ALIVE	0x01
82*49299Shibler #define VNF_INITED	0x02
8341480Smckusick 
84*49299Shibler int
85*49299Shibler vnopen(dev, flags, mode, p)
8641480Smckusick 	dev_t dev;
87*49299Shibler 	int flags, mode;
88*49299Shibler 	struct proc *p;
8941480Smckusick {
90*49299Shibler 	int unit = vnunit(dev);
9141480Smckusick 
9241480Smckusick #ifdef DEBUG
93*49299Shibler 	if (vndebug & VDB_FOLLOW)
94*49299Shibler 		printf("vnopen(%x, %x, %x, %x)\n", dev, flags, mode, p);
9541480Smckusick #endif
96*49299Shibler 	if (unit >= NVN)
9741480Smckusick 		return(ENXIO);
9841480Smckusick 	return(0);
9941480Smckusick }
10041480Smckusick 
10141480Smckusick /*
10241480Smckusick  * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
10341480Smckusick  * Note that this driver can only be used for swapping over NFS on the hp
10441480Smckusick  * since nfs_strategy on the vax cannot handle u-areas and page tables.
10541480Smckusick  */
106*49299Shibler vnstrategy(bp)
10741480Smckusick 	register struct buf *bp;
10841480Smckusick {
109*49299Shibler 	int unit = vnunit(bp->b_dev);
110*49299Shibler 	register struct vn_softc *vn = &vn_softc[unit];
11141480Smckusick 	register struct buf *nbp;
11241480Smckusick 	register int bn, bsize, resid;
11341480Smckusick 	register caddr_t addr;
11441480Smckusick 	int sz, flags;
115*49299Shibler 	extern int vniodone();
11641480Smckusick 
11741480Smckusick #ifdef DEBUG
118*49299Shibler 	if (vndebug & VDB_FOLLOW)
119*49299Shibler 		printf("vnstrategy(%x): unit %d\n", bp, unit);
12041480Smckusick #endif
121*49299Shibler 	if ((vn->sc_flags & VNF_INITED) == 0) {
12241480Smckusick 		bp->b_error = ENXIO;
12341480Smckusick 		bp->b_flags |= B_ERROR;
124*49299Shibler 		biodone(bp);
12541480Smckusick 		return;
12641480Smckusick 	}
12741480Smckusick 	bn = bp->b_blkno;
12841480Smckusick 	sz = howmany(bp->b_bcount, DEV_BSIZE);
12941480Smckusick 	bp->b_resid = bp->b_bcount;
130*49299Shibler 	if (bn < 0 || bn + sz > vn->sc_size) {
131*49299Shibler 		if (bn != vn->sc_size) {
13241480Smckusick 			bp->b_error = EINVAL;
13341480Smckusick 			bp->b_flags |= B_ERROR;
13441480Smckusick 		}
135*49299Shibler 		biodone(bp);
13641480Smckusick 		return;
13741480Smckusick 	}
13841480Smckusick 	bn = dbtob(bn);
139*49299Shibler 	bsize = vn->sc_vp->v_mount->mnt_stat.f_bsize;
14041480Smckusick 	addr = bp->b_un.b_addr;
14141480Smckusick 	flags = bp->b_flags | B_CALL;
14241480Smckusick 	for (resid = bp->b_resid; resid; resid -= sz) {
14341480Smckusick 		struct vnode *vp;
14441480Smckusick 		daddr_t nbn;
14541480Smckusick 		int off, s;
14641480Smckusick 
147*49299Shibler 		nbp = getvnbuf();
14841480Smckusick 		off = bn % bsize;
14941480Smckusick 		sz = MIN(bsize - off, resid);
150*49299Shibler 		(void) VOP_BMAP(vn->sc_vp, bn / bsize, &vp, &nbn);
15141480Smckusick #ifdef DEBUG
152*49299Shibler 		if (vndebug & VDB_IO)
153*49299Shibler 			printf("vnstrategy: vp %x/%x bn %x/%x\n",
154*49299Shibler 			       vn->sc_vp, vp, bn, nbn);
15541480Smckusick #endif
15641480Smckusick 		nbp->b_flags = flags;
15741480Smckusick 		nbp->b_bcount = sz;
15841480Smckusick 		nbp->b_bufsize = bp->b_bufsize;
15941480Smckusick 		nbp->b_error = 0;
160*49299Shibler 		if (vp->v_type == VBLK || vp->v_type == VCHR)
161*49299Shibler 			nbp->b_dev = vp->v_rdev;
162*49299Shibler 		else
163*49299Shibler 			nbp->b_dev = NODEV;
16441480Smckusick 		nbp->b_un.b_addr = addr;
16541480Smckusick 		nbp->b_blkno = nbn + btodb(off);
16641480Smckusick 		nbp->b_proc = bp->b_proc;
167*49299Shibler 		nbp->b_iodone = vniodone;
16841480Smckusick 		nbp->b_vp = vp;
16941480Smckusick 		nbp->b_pfcent = (int) bp;	/* XXX */
17041480Smckusick 		/*
17141480Smckusick 		 * Just sort by block number
17241480Smckusick 		 */
17341480Smckusick 		nbp->b_cylin = nbp->b_blkno;
17441480Smckusick 		s = splbio();
175*49299Shibler 		disksort(&vntab[unit], nbp);
176*49299Shibler 		if (vntab[unit].b_active < vn->sc_maxactive) {
177*49299Shibler 			vntab[unit].b_active++;
178*49299Shibler 			vnstart(unit);
17941480Smckusick 		}
18041480Smckusick 		splx(s);
18141480Smckusick 		bn += sz;
18241480Smckusick 		addr += sz;
18341480Smckusick 	}
18441480Smckusick }
18541480Smckusick 
18641480Smckusick /*
18741480Smckusick  * Feed requests sequentially.
18841480Smckusick  * We do it this way to keep from flooding NFS servers if we are connected
18941480Smckusick  * to an NFS file.  This places the burden on the client rather than the
19041480Smckusick  * server.
19141480Smckusick  */
192*49299Shibler vnstart(unit)
19341480Smckusick {
194*49299Shibler 	register struct vn_softc *vn = &vn_softc[unit];
19541480Smckusick 	register struct buf *bp;
19641480Smckusick 
19741480Smckusick 	/*
19841480Smckusick 	 * Dequeue now since lower level strategy routine might
19941480Smckusick 	 * queue using same links
20041480Smckusick 	 */
201*49299Shibler 	bp = vntab[unit].b_actf;
202*49299Shibler 	vntab[unit].b_actf = bp->b_actf;
20341480Smckusick #ifdef DEBUG
204*49299Shibler 	if (vndebug & VDB_IO)
205*49299Shibler 		printf("vnstart(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
20641480Smckusick 		       unit, bp, bp->b_vp, bp->b_blkno, bp->b_un.b_addr,
20741480Smckusick 		       bp->b_bcount);
20841480Smckusick #endif
20941480Smckusick 	VOP_STRATEGY(bp);
21041480Smckusick }
21141480Smckusick 
212*49299Shibler vniodone(bp)
21341480Smckusick 	register struct buf *bp;
21441480Smckusick {
21541480Smckusick 	register struct buf *pbp = (struct buf *)bp->b_pfcent;	/* XXX */
216*49299Shibler 	register int unit = vnunit(pbp->b_dev);
21741480Smckusick 	int s;
21841480Smckusick 
21941480Smckusick 	s = splbio();
22041480Smckusick #ifdef DEBUG
221*49299Shibler 	if (vndebug & VDB_IO)
222*49299Shibler 		printf("vniodone(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
22341480Smckusick 		       unit, bp, bp->b_vp, bp->b_blkno, bp->b_un.b_addr,
22441480Smckusick 		       bp->b_bcount);
22541480Smckusick #endif
22641480Smckusick 	if (bp->b_error) {
22741480Smckusick #ifdef DEBUG
228*49299Shibler 		if (vndebug & VDB_IO)
229*49299Shibler 			printf("vniodone: bp %x error %d\n", bp, bp->b_error);
23041480Smckusick #endif
23141480Smckusick 		pbp->b_flags |= B_ERROR;
232*49299Shibler 		pbp->b_error = biowait(bp);
23341480Smckusick 	}
23441480Smckusick 	pbp->b_resid -= bp->b_bcount;
235*49299Shibler 	putvnbuf(bp);
23641480Smckusick 	if (pbp->b_resid == 0) {
23741480Smckusick #ifdef DEBUG
238*49299Shibler 		if (vndebug & VDB_IO)
239*49299Shibler 			printf("vniodone: pbp %x iodone\n", pbp);
24041480Smckusick #endif
241*49299Shibler 		biodone(pbp);
24241480Smckusick 	}
243*49299Shibler 	if (vntab[unit].b_actf)
244*49299Shibler 		vnstart(unit);
24541480Smckusick 	else
246*49299Shibler 		vntab[unit].b_active--;
24741480Smckusick 	splx(s);
24841480Smckusick }
24941480Smckusick 
250*49299Shibler vnread(dev, uio, flags, p)
25141480Smckusick 	dev_t dev;
25241480Smckusick 	struct uio *uio;
253*49299Shibler 	int flags;
254*49299Shibler 	struct proc *p;
25541480Smckusick {
256*49299Shibler 	register int unit = vnunit(dev);
25741480Smckusick 
25841480Smckusick #ifdef DEBUG
259*49299Shibler 	if (vndebug & VDB_FOLLOW)
260*49299Shibler 		printf("vnread(%x, %x, %x, %x)\n", dev, uio, flags, p);
26141480Smckusick #endif
262*49299Shibler 	return(physio(vnstrategy, &vnbuf[unit], dev, B_READ, minphys, uio));
26341480Smckusick }
26441480Smckusick 
265*49299Shibler vnwrite(dev, uio, flags, p)
26641480Smckusick 	dev_t dev;
26741480Smckusick 	struct uio *uio;
268*49299Shibler 	int flags;
269*49299Shibler 	struct proc *p;
27041480Smckusick {
271*49299Shibler 	register int unit = vnunit(dev);
27241480Smckusick 
27341480Smckusick #ifdef DEBUG
274*49299Shibler 	if (vndebug & VDB_FOLLOW)
275*49299Shibler 		printf("vnwrite(%x, %x, %x, %x)\n", dev, uio, flags, p);
27641480Smckusick #endif
277*49299Shibler 	return(physio(vnstrategy, &vnbuf[unit], dev, B_WRITE, minphys, uio));
27841480Smckusick }
27941480Smckusick 
28041480Smckusick /* ARGSUSED */
281*49299Shibler vnioctl(dev, cmd, data, flag, p)
28241480Smckusick 	dev_t dev;
28341480Smckusick 	u_long cmd;
28441480Smckusick 	caddr_t data;
28541480Smckusick 	int flag;
286*49299Shibler 	struct proc *p;
28741480Smckusick {
288*49299Shibler 	int unit = vnunit(dev);
289*49299Shibler 	register struct vn_softc *vn;
290*49299Shibler 	struct vn_ioctl *vio;
29141480Smckusick 	struct vattr vattr;
292*49299Shibler 	struct nameidata nd;
29341480Smckusick 	int error;
29441480Smckusick 
29541480Smckusick #ifdef DEBUG
296*49299Shibler 	if (vndebug & VDB_FOLLOW)
297*49299Shibler 		printf("vnioctl(%x, %x, %x, %x, %x): unit %d\n",
298*49299Shibler 		       dev, cmd, data, flag, p, unit);
29941480Smckusick #endif
300*49299Shibler 	error = suser(p->p_ucred, &p->p_acflag);
30141480Smckusick 	if (error)
30241480Smckusick 		return (error);
303*49299Shibler 	if (unit >= NVN)
30441480Smckusick 		return (ENXIO);
30541480Smckusick 
306*49299Shibler 	vn = &vn_softc[unit];
307*49299Shibler 	vio = (struct vn_ioctl *)data;
30841480Smckusick 	switch (cmd) {
30941480Smckusick 
310*49299Shibler 	case VNIOCSET:
311*49299Shibler 		if (vn->sc_flags & VNF_INITED)
31241480Smckusick 			return(EBUSY);
31341480Smckusick 		/*
31441480Smckusick 		 * Always open for read and write.
31541480Smckusick 		 * This is probably bogus, but it lets vn_open()
31641480Smckusick 		 * weed out directories, sockets, etc. so we don't
31741480Smckusick 		 * have to worry about them.
31841480Smckusick 		 */
319*49299Shibler 		nd.ni_segflg = UIO_USERSPACE;
320*49299Shibler 		nd.ni_dirp = vio->vn_file;
321*49299Shibler 		error = vn_open(&nd, p, FREAD|FWRITE, 0);
32241480Smckusick 		if (error)
32341480Smckusick 			return(error);
324*49299Shibler 		error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
32541480Smckusick 		if (error) {
326*49299Shibler 			vrele(nd.ni_vp);
32741480Smckusick 			return(error);
32841480Smckusick 		}
329*49299Shibler 		vn->sc_vp = nd.ni_vp;
330*49299Shibler 		vn->sc_size = btodb(vattr.va_size);	/* note truncation */
331*49299Shibler 		error = vnsetcred(vn, p->p_ucred);
33241480Smckusick 		if (error) {
333*49299Shibler 			vrele(vn->sc_vp);
33441480Smckusick 			return(error);
33541480Smckusick 		}
336*49299Shibler 		vnthrottle(vn, vn->sc_vp);
337*49299Shibler 		vio->vn_size = dbtob(vn->sc_size);
338*49299Shibler 		vn->sc_flags |= VNF_INITED;
33941480Smckusick #ifdef DEBUG
340*49299Shibler 		if (vndebug & VDB_INIT)
341*49299Shibler 			printf("vnioctl: SET vp %x size %x\n",
342*49299Shibler 			       vn->sc_vp, vn->sc_size);
34341480Smckusick #endif
34441480Smckusick 		break;
34541480Smckusick 
346*49299Shibler 	case VNIOCCLR:
347*49299Shibler 		if ((vn->sc_flags & VNF_INITED) == 0)
34841480Smckusick 			return(ENXIO);
349*49299Shibler 		vnclear(vn);
35041480Smckusick #ifdef DEBUG
351*49299Shibler 		if (vndebug & VDB_INIT)
352*49299Shibler 			printf("vnioctl: CLRed\n");
35341480Smckusick #endif
35441480Smckusick 		break;
35541480Smckusick 
35641480Smckusick 	default:
35741480Smckusick 		return(ENXIO);
35841480Smckusick 	}
35941480Smckusick 	return(0);
36041480Smckusick }
36141480Smckusick 
36241480Smckusick /*
36341480Smckusick  * Duplicate the current processes' credentials.  Since we are called only
36441480Smckusick  * as the result of a SET ioctl and only root can do that, any future access
36541480Smckusick  * to this "disk" is essentially as root.  Note that credentials may change
36641480Smckusick  * if some other uid can write directly to the mapped file (NFS).
36741480Smckusick  */
368*49299Shibler vnsetcred(vn, cred)
369*49299Shibler 	register struct vn_softc *vn;
370*49299Shibler 	struct ucred cred;
37141480Smckusick {
37241480Smckusick 	struct uio auio;
37341480Smckusick 	struct iovec aiov;
37441480Smckusick 	char tmpbuf[DEV_BSIZE];
37541480Smckusick 
376*49299Shibler 	vn->sc_cred = crdup(cred);
37741480Smckusick 	/* XXX: Horrible kludge to establish credentials for NFS */
37841480Smckusick 	aiov.iov_base = tmpbuf;
379*49299Shibler 	aiov.iov_len = MIN(DEV_BSIZE, dbtob(vn->sc_size));
38041480Smckusick 	auio.uio_iov = &aiov;
38141480Smckusick 	auio.uio_iovcnt = 1;
38241480Smckusick 	auio.uio_offset = 0;
38341480Smckusick 	auio.uio_rw = UIO_READ;
38441480Smckusick 	auio.uio_segflg = UIO_SYSSPACE;
38541480Smckusick 	auio.uio_resid = aiov.iov_len;
386*49299Shibler 	return(VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred));
38741480Smckusick }
38841480Smckusick 
38941480Smckusick /*
39041480Smckusick  * Set maxactive based on FS type
39141480Smckusick  */
392*49299Shibler vnthrottle(vn, vp)
393*49299Shibler 	register struct vn_softc *vn;
39441480Smckusick 	struct vnode *vp;
39541480Smckusick {
396*49299Shibler 	extern struct vnodeops ufs_vnodeops, nfsv2_vnodeops;
39741480Smckusick 
398*49299Shibler 	if (vp->v_op == &nfsv2_vnodeops)
399*49299Shibler 		vn->sc_maxactive = 2;
40041480Smckusick 	else
401*49299Shibler 		vn->sc_maxactive = 8;
40241480Smckusick 
403*49299Shibler 	if (vn->sc_maxactive < 1)
404*49299Shibler 		vn->sc_maxactive = 1;
40541480Smckusick }
40641480Smckusick 
407*49299Shibler vnshutdown()
40841480Smckusick {
409*49299Shibler 	register struct vn_softc *vn;
41041480Smckusick 
411*49299Shibler 	for (vn = &vn_softc[0]; vn < &vn_softc[NVN]; vn++)
412*49299Shibler 		if (vn->sc_flags & VNF_INITED)
413*49299Shibler 			vnclear(vn);
41441480Smckusick }
41541480Smckusick 
416*49299Shibler vnclear(vn)
417*49299Shibler 	register struct vn_softc *vn;
41841480Smckusick {
419*49299Shibler 	register struct vnode *vp = vn->sc_vp;
42041480Smckusick 
42141480Smckusick #ifdef DEBUG
422*49299Shibler 	if (vndebug & VDB_FOLLOW)
423*49299Shibler 		printf("vnclear(%x): vp %x\n", vp);
42441480Smckusick #endif
425*49299Shibler 	vn->sc_flags &= ~VNF_INITED;
42641480Smckusick 	if (vp == (struct vnode *)0)
427*49299Shibler 		panic("vnioctl: null vp");
42841480Smckusick #if 0
42941480Smckusick 	/* XXX - this doesn't work right now */
430*49299Shibler 	(void) VOP_FSYNC(vp, 0, vn->sc_cred, MNT_WAIT, p);
43141480Smckusick #endif
432*49299Shibler 	vrele(vp);
433*49299Shibler 	crfree(vn->sc_cred);
434*49299Shibler 	vn->sc_vp = (struct vnode *)0;
435*49299Shibler 	vn->sc_cred = (struct ucred *)0;
436*49299Shibler 	vn->sc_size = 0;
43741480Smckusick }
43841480Smckusick 
439*49299Shibler vnsize(dev)
44041480Smckusick 	dev_t dev;
44141480Smckusick {
442*49299Shibler 	int unit = vnunit(dev);
443*49299Shibler 	register struct vn_softc *vn = &vn_softc[unit];
44441480Smckusick 
445*49299Shibler 	if (unit >= NVN || (vn->sc_flags & VNF_INITED) == 0)
44641480Smckusick 		return(-1);
447*49299Shibler 	return(vn->sc_size);
44841480Smckusick }
44941480Smckusick 
450*49299Shibler vndump(dev)
45141480Smckusick {
45241480Smckusick 	return(ENXIO);
45341480Smckusick }
45441480Smckusick #endif
455