xref: /csrg-svn/sys/dev/vn.c (revision 45788)
141480Smckusick /*
241480Smckusick  * Copyright (c) 1988 University of Utah.
341480Smckusick  * Copyright (c) 1990 The Regents of the University of California.
441480Smckusick  * All rights reserved.
541480Smckusick  *
641480Smckusick  * This code is derived from software contributed to Berkeley by
741480Smckusick  * the Systems Programming Group of the University of Utah Computer
841480Smckusick  * Science Department.
941480Smckusick  *
1041480Smckusick  * %sccs.include.redist.c%
1141480Smckusick  *
1245485Smckusick  * from: Utah $Hdr: fd.c 1.1 90/07/09$
1341480Smckusick  *
14*45788Sbostic  *	@(#)vn.c	7.3 (Berkeley) 12/16/90
1541480Smckusick  */
1641480Smckusick 
1741480Smckusick /*
1841480Smckusick  * File (vnode) disk driver.
1941480Smckusick  *
2041480Smckusick  * Block/character interface to a vnode.  Note that this uses the
2141480Smckusick  * VOP_BMAP/VOP_STRATEGY interface to the vnode instead of a simple
2241480Smckusick  * VOP_RDWR.  We do this to avoid distorting the local buffer cache.
2341480Smckusick  *
2441480Smckusick  * NOTE: There is a security issue involved with this driver.
2541480Smckusick  * Once mounted all access to the contents of the "mapped" file via
2641480Smckusick  * the special file is controlled by the permissions on the special
2741480Smckusick  * file, the protection of the mapped file is ignored (effectively,
2841480Smckusick  * by using root credentials in all transactions).
2941480Smckusick  */
3041480Smckusick #include "fd.h"
3141480Smckusick #if NFD > 0
3241480Smckusick 
33*45788Sbostic #include "sys/param.h"
34*45788Sbostic #include "sys/systm.h"
35*45788Sbostic #include "sys/buf.h"
36*45788Sbostic #include "sys/errno.h"
37*45788Sbostic #include "sys/dkstat.h"
38*45788Sbostic #include "sys/ioctl.h"
39*45788Sbostic #include "sys/user.h"
40*45788Sbostic #include "sys/vfs.h"
41*45788Sbostic #include "sys/vnode.h"
42*45788Sbostic #include "sys/file.h"
43*45788Sbostic #include "sys/uio.h"
44*45788Sbostic #include "sys/malloc.h"
4541480Smckusick 
4641480Smckusick #include "fdioctl.h"
4741480Smckusick 
4841480Smckusick #ifdef DEBUG
4941480Smckusick int fddebug = 0x00;
5041480Smckusick #define FDB_FOLLOW	0x01
5141480Smckusick #define FDB_INIT	0x02
5241480Smckusick #define FDB_IO		0x04
5341480Smckusick #endif
5441480Smckusick 
5541480Smckusick struct	buf fdbuf[NFD];
5641480Smckusick struct	buf fdtab[NFD];
5741480Smckusick 
5841480Smckusick #define b_cylin	b_resid
5941480Smckusick 
6041480Smckusick #define	fdunit(x)	((minor(x) >> 3) & 0x7)	/* for consistency */
6141480Smckusick 
6241480Smckusick #define	getfdbuf()	\
6341480Smckusick 	((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK))
6441480Smckusick #define putfdbuf(bp)	\
6541480Smckusick 	free((caddr_t)(bp), M_DEVBUF)
6641480Smckusick 
6741480Smckusick struct fd_softc {
6841480Smckusick 	int		 sc_flags;	/* flags */
6941480Smckusick 	size_t		 sc_size;	/* size of fd */
7041480Smckusick 	struct vnode	*sc_vp;		/* vnode */
7141480Smckusick 	struct ucred	*sc_cred;	/* credentials */
7241480Smckusick 	int		 sc_maxactive;	/* max # of active requests */
7341480Smckusick } fd_softc[NFD];
7441480Smckusick 
7541480Smckusick /* sc_flags */
7641480Smckusick #define	FDF_ALIVE	0x01
7741480Smckusick #define FDF_INITED	0x02
7841480Smckusick 
7941480Smckusick fdopen(dev, flags)
8041480Smckusick 	dev_t dev;
8141480Smckusick {
8241480Smckusick 	int unit = fdunit(dev);
8341480Smckusick 
8441480Smckusick #ifdef DEBUG
8541480Smckusick 	if (fddebug & FDB_FOLLOW)
8641480Smckusick 		printf("fdopen(%x, %x)\n", dev, flags);
8741480Smckusick #endif
8841480Smckusick 	if (unit >= NFD)
8941480Smckusick 		return(ENXIO);
9041480Smckusick 	return(0);
9141480Smckusick }
9241480Smckusick 
9341480Smckusick /*
9441480Smckusick  * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
9541480Smckusick  * Note that this driver can only be used for swapping over NFS on the hp
9641480Smckusick  * since nfs_strategy on the vax cannot handle u-areas and page tables.
9741480Smckusick  */
9841480Smckusick fdstrategy(bp)
9941480Smckusick 	register struct buf *bp;
10041480Smckusick {
10141480Smckusick 	int unit = fdunit(bp->b_dev);
10241480Smckusick 	register struct fd_softc *fs = &fd_softc[unit];
10341480Smckusick 	register struct buf *nbp;
10441480Smckusick 	register int bn, bsize, resid;
10541480Smckusick 	register caddr_t addr;
10641480Smckusick 	int sz, flags;
10741480Smckusick 	extern int fdiodone();
10841480Smckusick 
10941480Smckusick #ifdef DEBUG
11041480Smckusick 	if (fddebug & FDB_FOLLOW)
11141480Smckusick 		printf("fdstrategy(%x): unit %d\n", bp, unit);
11241480Smckusick #endif
11341480Smckusick 	if ((fs->sc_flags & FDF_INITED) == 0) {
11441480Smckusick 		bp->b_error = ENXIO;
11541480Smckusick 		bp->b_flags |= B_ERROR;
11641480Smckusick 		iodone(bp);
11741480Smckusick 		return;
11841480Smckusick 	}
11941480Smckusick 	bn = bp->b_blkno;
12041480Smckusick 	sz = howmany(bp->b_bcount, DEV_BSIZE);
12141480Smckusick 	bp->b_resid = bp->b_bcount;
12241480Smckusick 	if (bn < 0 || bn + sz > fs->sc_size) {
12341480Smckusick 		if (bn != fs->sc_size) {
12441480Smckusick 			bp->b_error = EINVAL;
12541480Smckusick 			bp->b_flags |= B_ERROR;
12641480Smckusick 		}
12741480Smckusick 		iodone(bp);
12841480Smckusick 		return;
12941480Smckusick 	}
13041480Smckusick 	bn = dbtob(bn);
13141480Smckusick 	bsize = fs->sc_vp->v_vfsp->vfs_bsize;
13241480Smckusick 	addr = bp->b_un.b_addr;
13341480Smckusick 	flags = bp->b_flags | B_CALL;
13441480Smckusick 	for (resid = bp->b_resid; resid; resid -= sz) {
13541480Smckusick 		struct vnode *vp;
13641480Smckusick 		daddr_t nbn;
13741480Smckusick 		int off, s;
13841480Smckusick 
13941480Smckusick 		nbp = getfdbuf();
14041480Smckusick 		off = bn % bsize;
14141480Smckusick 		sz = MIN(bsize - off, resid);
14241480Smckusick 		(void) VOP_BMAP(fs->sc_vp, bn / bsize, &vp, &nbn);
14341480Smckusick #ifdef DEBUG
14441480Smckusick 		if (fddebug & FDB_IO)
14541480Smckusick 			printf("fdstrategy: vp %x/%x bn %x/%x dev %x\n",
14641480Smckusick 			       fs->sc_vp, vp, bn, nbn, vp->v_rdev);
14741480Smckusick #endif
14841480Smckusick 		nbp->b_flags = flags;
14941480Smckusick 		nbp->b_bcount = sz;
15041480Smckusick 		nbp->b_bufsize = bp->b_bufsize;
15141480Smckusick 		nbp->b_error = 0;
15241480Smckusick 		nbp->b_dev = vp->v_rdev;
15341480Smckusick 		nbp->b_un.b_addr = addr;
15441480Smckusick 		nbp->b_blkno = nbn + btodb(off);
15541480Smckusick 		nbp->b_proc = bp->b_proc;
15641480Smckusick 		nbp->b_iodone = fdiodone;
15741480Smckusick 		nbp->b_vp = vp;
15841480Smckusick 		nbp->b_pfcent = (int) bp;	/* XXX */
15941480Smckusick 		/*
16041480Smckusick 		 * Just sort by block number
16141480Smckusick 		 */
16241480Smckusick 		nbp->b_cylin = nbp->b_blkno;
16341480Smckusick 		s = splbio();
16441480Smckusick 		disksort(&fdtab[unit], nbp);
16541480Smckusick 		if (fdtab[unit].b_active < fs->sc_maxactive) {
16641480Smckusick 			fdtab[unit].b_active++;
16741480Smckusick 			fdstart(unit);
16841480Smckusick 		}
16941480Smckusick 		splx(s);
17041480Smckusick 		bn += sz;
17141480Smckusick 		addr += sz;
17241480Smckusick 	}
17341480Smckusick }
17441480Smckusick 
17541480Smckusick /*
17641480Smckusick  * Feed requests sequentially.
17741480Smckusick  * We do it this way to keep from flooding NFS servers if we are connected
17841480Smckusick  * to an NFS file.  This places the burden on the client rather than the
17941480Smckusick  * server.
18041480Smckusick  */
18141480Smckusick fdstart(unit)
18241480Smckusick {
18341480Smckusick 	register struct fd_softc *fs = &fd_softc[unit];
18441480Smckusick 	register struct buf *bp;
18541480Smckusick 
18641480Smckusick 	/*
18741480Smckusick 	 * Dequeue now since lower level strategy routine might
18841480Smckusick 	 * queue using same links
18941480Smckusick 	 */
19041480Smckusick 	bp = fdtab[unit].b_actf;
19141480Smckusick 	fdtab[unit].b_actf = bp->b_actf;
19241480Smckusick #ifdef DEBUG
19341480Smckusick 	if (fddebug & FDB_IO)
19441480Smckusick 		printf("fdstart(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
19541480Smckusick 		       unit, bp, bp->b_vp, bp->b_blkno, bp->b_un.b_addr,
19641480Smckusick 		       bp->b_bcount);
19741480Smckusick #endif
19841480Smckusick 	VOP_STRATEGY(bp);
19941480Smckusick }
20041480Smckusick 
20141480Smckusick fdiodone(bp)
20241480Smckusick 	register struct buf *bp;
20341480Smckusick {
20441480Smckusick 	register struct buf *pbp = (struct buf *)bp->b_pfcent;	/* XXX */
20541480Smckusick 	register int unit = fdunit(pbp->b_dev);
20641480Smckusick 	int s;
20741480Smckusick 
20841480Smckusick 	s = splbio();
20941480Smckusick #ifdef DEBUG
21041480Smckusick 	if (fddebug & FDB_IO)
21141480Smckusick 		printf("fdiodone(%d): bp %x vp %x blkno %x addr %x cnt %x\n",
21241480Smckusick 		       unit, bp, bp->b_vp, bp->b_blkno, bp->b_un.b_addr,
21341480Smckusick 		       bp->b_bcount);
21441480Smckusick #endif
21541480Smckusick 	if (bp->b_error) {
21641480Smckusick #ifdef DEBUG
21741480Smckusick 		if (fddebug & FDB_IO)
21841480Smckusick 			printf("fdiodone: bp %x error %d\n", bp, bp->b_error);
21941480Smckusick #endif
22041480Smckusick 		pbp->b_flags |= B_ERROR;
22141480Smckusick 		pbp->b_error = geterror(bp);
22241480Smckusick 	}
22341480Smckusick 	pbp->b_resid -= bp->b_bcount;
22441480Smckusick 	putfdbuf(bp);
22541480Smckusick 	if (pbp->b_resid == 0) {
22641480Smckusick #ifdef DEBUG
22741480Smckusick 		if (fddebug & FDB_IO)
22841480Smckusick 			printf("fdiodone: pbp %x iodone\n", pbp);
22941480Smckusick #endif
23041480Smckusick 		iodone(pbp);
23141480Smckusick 	}
23241480Smckusick 	if (fdtab[unit].b_actf)
23341480Smckusick 		fdstart(unit);
23441480Smckusick 	else
23541480Smckusick 		fdtab[unit].b_active--;
23641480Smckusick 	splx(s);
23741480Smckusick }
23841480Smckusick 
23941480Smckusick fdread(dev, uio)
24041480Smckusick 	dev_t dev;
24141480Smckusick 	struct uio *uio;
24241480Smckusick {
24341480Smckusick 	register int unit = fdunit(dev);
24441480Smckusick 
24541480Smckusick #ifdef DEBUG
24641480Smckusick 	if (fddebug & FDB_FOLLOW)
24741480Smckusick 		printf("fdread(%x, %x)\n", dev, uio);
24841480Smckusick #endif
24941480Smckusick 	return(physio(fdstrategy, &fdbuf[unit], dev, B_READ, minphys, uio));
25041480Smckusick }
25141480Smckusick 
25241480Smckusick fdwrite(dev, uio)
25341480Smckusick 	dev_t dev;
25441480Smckusick 	struct uio *uio;
25541480Smckusick {
25641480Smckusick 	register int unit = fdunit(dev);
25741480Smckusick 
25841480Smckusick #ifdef DEBUG
25941480Smckusick 	if (fddebug & FDB_FOLLOW)
26041480Smckusick 		printf("fdwrite(%x, %x)\n", dev, uio);
26141480Smckusick #endif
26241480Smckusick 	return(physio(fdstrategy, &fdbuf[unit], dev, B_WRITE, minphys, uio));
26341480Smckusick }
26441480Smckusick 
26541480Smckusick /* ARGSUSED */
26641480Smckusick fdioctl(dev, cmd, data, flag)
26741480Smckusick 	dev_t dev;
26841480Smckusick 	u_long cmd;
26941480Smckusick 	caddr_t data;
27041480Smckusick 	int flag;
27141480Smckusick {
27241480Smckusick 	int unit = fdunit(dev);
27341480Smckusick 	register struct fd_softc *fs;
27441480Smckusick 	struct fd_ioctl *fio;
27541480Smckusick 	struct vattr vattr;
27641480Smckusick 	struct vnode *vp;
27741480Smckusick 	int error;
27841480Smckusick 
27941480Smckusick #ifdef DEBUG
28041480Smckusick 	if (fddebug & FDB_FOLLOW)
28141480Smckusick 		printf("fdioctl(%x, %x, %x, %x): unit %d\n",
28241480Smckusick 		       dev, cmd, data, flag, unit);
28341480Smckusick #endif
28441480Smckusick 	error = suser(u.u_cred, &u.u_acflag);
28541480Smckusick 	if (error)
28641480Smckusick 		return (error);
28741480Smckusick 	if (unit >= NFD)
28841480Smckusick 		return (ENXIO);
28941480Smckusick 
29041480Smckusick 	fs = &fd_softc[unit];
29141480Smckusick 	fio = (struct fd_ioctl *)data;
29241480Smckusick 	switch (cmd) {
29341480Smckusick 
29441480Smckusick 	case FDIOCSET:
29541480Smckusick 		if (fs->sc_flags & FDF_INITED)
29641480Smckusick 			return(EBUSY);
29741480Smckusick 		/*
29841480Smckusick 		 * Always open for read and write.
29941480Smckusick 		 * This is probably bogus, but it lets vn_open()
30041480Smckusick 		 * weed out directories, sockets, etc. so we don't
30141480Smckusick 		 * have to worry about them.
30241480Smckusick 		 */
30341480Smckusick 		error = vn_open(fio->fd_file, UIO_USERSPACE,
30441480Smckusick 				FREAD|FWRITE, 0, &vp);
30541480Smckusick 		if (error)
30641480Smckusick 			return(error);
30741480Smckusick 		error = VOP_GETATTR(vp, &vattr, u.u_cred);
30841480Smckusick 		if (error) {
30941480Smckusick 			vn_close(vp, FREAD|FWRITE);
31041480Smckusick 			VN_RELE(vp);
31141480Smckusick 			return(error);
31241480Smckusick 		}
31341480Smckusick 		fs->sc_vp = vp;
31441480Smckusick 		fs->sc_size = btodb(vattr.va_size);	/* note truncation */
31541480Smckusick 		error = fdsetcred(fs);
31641480Smckusick 		if (error) {
31741480Smckusick 			vn_close(vp, FREAD|FWRITE);
31841480Smckusick 			VN_RELE(vp);
31941480Smckusick 			return(error);
32041480Smckusick 		}
32141480Smckusick 		fdthrottle(fs, vp);
32241480Smckusick 		fio->fd_size = dbtob(fs->sc_size);
32341480Smckusick 		fs->sc_flags |= FDF_INITED;
32441480Smckusick #ifdef DEBUG
32541480Smckusick 		if (fddebug & FDB_INIT)
32641480Smckusick 			printf("fdioctl: SET vp %x size %x\n",
32741480Smckusick 			       fs->sc_vp, fs->sc_size);
32841480Smckusick #endif
32941480Smckusick 		break;
33041480Smckusick 
33141480Smckusick 	case FDIOCCLR:
33241480Smckusick 		if ((fs->sc_flags & FDF_INITED) == 0)
33341480Smckusick 			return(ENXIO);
33441480Smckusick 		fdclear(fs);
33541480Smckusick #ifdef DEBUG
33641480Smckusick 		if (fddebug & FDB_INIT)
33741480Smckusick 			printf("fdioctl: CLRed\n");
33841480Smckusick #endif
33941480Smckusick 		break;
34041480Smckusick 
34141480Smckusick 	default:
34241480Smckusick 		return(ENXIO);
34341480Smckusick 	}
34441480Smckusick 	return(0);
34541480Smckusick }
34641480Smckusick 
34741480Smckusick /*
34841480Smckusick  * Duplicate the current processes' credentials.  Since we are called only
34941480Smckusick  * as the result of a SET ioctl and only root can do that, any future access
35041480Smckusick  * to this "disk" is essentially as root.  Note that credentials may change
35141480Smckusick  * if some other uid can write directly to the mapped file (NFS).
35241480Smckusick  */
35341480Smckusick fdsetcred(fs)
35441480Smckusick 	register struct fd_softc *fs;
35541480Smckusick {
35641480Smckusick 	struct uio auio;
35741480Smckusick 	struct iovec aiov;
35841480Smckusick 	char tmpbuf[DEV_BSIZE];
35941480Smckusick 
36041480Smckusick 	fs->sc_cred = crdup(u.u_cred);
36141480Smckusick 	/* XXX: Horrible kludge to establish credentials for NFS */
36241480Smckusick 	aiov.iov_base = tmpbuf;
36341480Smckusick 	aiov.iov_len = MIN(DEV_BSIZE, dbtob(fs->sc_size));
36441480Smckusick 	auio.uio_iov = &aiov;
36541480Smckusick 	auio.uio_iovcnt = 1;
36641480Smckusick 	auio.uio_offset = 0;
36741480Smckusick 	auio.uio_rw = UIO_READ;
36841480Smckusick 	auio.uio_segflg = UIO_SYSSPACE;
36941480Smckusick 	auio.uio_resid = aiov.iov_len;
37041480Smckusick 	return(VOP_READ(fs->sc_vp, &auio, 0, fs->sc_cred));
37141480Smckusick }
37241480Smckusick 
37341480Smckusick /*
37441480Smckusick  * Set maxactive based on FS type
37541480Smckusick  */
37641480Smckusick fdthrottle(fs, vp)
37741480Smckusick 	register struct fd_softc *fs;
37841480Smckusick 	struct vnode *vp;
37941480Smckusick {
38041480Smckusick 	extern struct vnodeops ufs_vnodeops, nfs_vnodeops;
38141480Smckusick 
38241480Smckusick 	if (vp->v_op == &nfs_vnodeops)
38341480Smckusick 		fs->sc_maxactive = 2;
38441480Smckusick 	else
38541480Smckusick 		fs->sc_maxactive = 8;
38641480Smckusick 
38741480Smckusick 	if (fs->sc_maxactive < 1)
38841480Smckusick 		fs->sc_maxactive = 1;
38941480Smckusick }
39041480Smckusick 
39141480Smckusick fdshutdown()
39241480Smckusick {
39341480Smckusick 	register struct fd_softc *fs;
39441480Smckusick 
39541480Smckusick 	for (fs = &fd_softc[0]; fs < &fd_softc[NFD]; fs++)
39641480Smckusick 		if (fs->sc_flags & FDF_INITED)
39741480Smckusick 			fdclear(fs);
39841480Smckusick }
39941480Smckusick 
40041480Smckusick fdclear(fs)
40141480Smckusick 	register struct fd_softc *fs;
40241480Smckusick {
40341480Smckusick 	register struct vnode *vp = fs->sc_vp;
40441480Smckusick 
40541480Smckusick #ifdef DEBUG
40641480Smckusick 	if (fddebug & FDB_FOLLOW)
40741480Smckusick 		printf("fdclear(%x): vp %x\n", vp);
40841480Smckusick #endif
40941480Smckusick 	fs->sc_flags &= ~FDF_INITED;
41041480Smckusick 	if (vp == (struct vnode *)0)
41141480Smckusick 		panic("fdioctl: null vp");
41241480Smckusick #if 0
41341480Smckusick 	/* XXX - this doesn't work right now */
41441480Smckusick 	(void) VOP_FSYNC(vp, fs->sc_cred);
41541480Smckusick #endif
41641480Smckusick 	vn_close(vp, FREAD|FWRITE);
41741480Smckusick 	VN_RELE(vp);
41841480Smckusick 	crfree(fs->sc_cred);
41941480Smckusick 	fs->sc_vp = (struct vnode *)0;
42041480Smckusick 	fs->sc_cred = (struct ucred *)0;
42141480Smckusick 	fs->sc_size = 0;
42241480Smckusick }
42341480Smckusick 
42441480Smckusick fdsize(dev)
42541480Smckusick 	dev_t dev;
42641480Smckusick {
42741480Smckusick 	int unit = fdunit(dev);
42841480Smckusick 	register struct fd_softc *fs = &fd_softc[unit];
42941480Smckusick 
43041480Smckusick 	if (unit >= NFD || (fs->sc_flags & FDF_INITED) == 0)
43141480Smckusick 		return(-1);
43241480Smckusick 	return(fs->sc_size);
43341480Smckusick }
43441480Smckusick 
43541480Smckusick fddump(dev)
43641480Smckusick {
43741480Smckusick 	return(ENXIO);
43841480Smckusick }
43941480Smckusick #endif
440