xref: /csrg-svn/sys/vm/vnode_pager.c (revision 48362)
145749Smckusick /*
245749Smckusick  * Copyright (c) 1990 University of Utah.
345749Smckusick  * Copyright (c) 1991 The Regents of the University of California.
445749Smckusick  * All rights reserved.
545749Smckusick  *
645749Smckusick  * This code is derived from software contributed to Berkeley by
745749Smckusick  * the Systems Programming Group of the University of Utah Computer
845749Smckusick  * Science Department.
945749Smckusick  *
1045749Smckusick  * %sccs.include.redist.c%
1145749Smckusick  *
12*48362Smckusick  *	@(#)vnode_pager.c	7.4 (Berkeley) 04/19/91
1345749Smckusick  */
1445749Smckusick 
1545749Smckusick /*
1645749Smckusick  * Page to/from files (vnodes).
1745749Smckusick  *
1845749Smckusick  * TODO:
1945749Smckusick  *	pageouts
2047977Skarels  *	fix credential use (uses current process credentials now)
2145749Smckusick  */
2245749Smckusick #include "vnodepager.h"
2345749Smckusick #if NVNODEPAGER > 0
2445749Smckusick 
2545749Smckusick #include "param.h"
2647977Skarels #include "proc.h"
2745749Smckusick #include "malloc.h"
2845749Smckusick #include "vnode.h"
2945749Smckusick #include "uio.h"
3045749Smckusick #include "mount.h"
3147977Skarels 
3247977Skarels #include "vm_param.h"
3347977Skarels #include "lock.h"
3445749Smckusick #include "queue.h"
3547977Skarels #include "vm_prot.h"
3647977Skarels #include "vm_object.h"
3747977Skarels #include "vm_page.h"
3847977Skarels #include "vnode_pager.h"
3945749Smckusick 
4045749Smckusick queue_head_t	vnode_pager_list;	/* list of managed vnodes */
4145749Smckusick 
4245749Smckusick #ifdef DEBUG
4345749Smckusick int	vpagerdebug = 0x00;
4445749Smckusick #define	VDB_FOLLOW	0x01
4545749Smckusick #define VDB_INIT	0x02
4645749Smckusick #define VDB_IO		0x04
4745749Smckusick #define VDB_FAIL	0x08
4845749Smckusick #define VDB_ALLOC	0x10
4945749Smckusick #define VDB_SIZE	0x20
5045749Smckusick #endif
5145749Smckusick 
5245749Smckusick void
5345749Smckusick vnode_pager_init()
5445749Smckusick {
5545749Smckusick #ifdef DEBUG
5645749Smckusick 	if (vpagerdebug & VDB_FOLLOW)
5745749Smckusick 		printf("vnode_pager_init()\n");
5845749Smckusick #endif
5945749Smckusick 	queue_init(&vnode_pager_list);
6045749Smckusick }
6145749Smckusick 
6245749Smckusick /*
6345749Smckusick  * Allocate (or lookup) pager for a vnode.
6445749Smckusick  * Handle is a vnode pointer.
6545749Smckusick  */
6645749Smckusick vm_pager_t
6745749Smckusick vnode_pager_alloc(handle, size, prot)
6845749Smckusick 	caddr_t handle;
6945749Smckusick 	vm_size_t size;
7045749Smckusick 	vm_prot_t prot;
7145749Smckusick {
7245749Smckusick 	register vm_pager_t pager;
7345749Smckusick 	register vn_pager_t vnp;
7445749Smckusick 	vm_object_t object;
7545749Smckusick 	struct vattr vattr;
7645749Smckusick 	struct vnode *vp;
7748042Smckusick 	struct proc *p = curproc;	/* XXX */
7845749Smckusick 
7945749Smckusick #ifdef DEBUG
8045749Smckusick 	if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC))
8145749Smckusick 		printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot);
8245749Smckusick #endif
8345749Smckusick 	/*
8445749Smckusick 	 * Pageout to vnode, no can do yet.
8545749Smckusick 	 */
8645749Smckusick 	if (handle == NULL)
8745749Smckusick 		return(VM_PAGER_NULL);
8845749Smckusick 
8945749Smckusick 	/*
9045749Smckusick 	 * Vnodes keep a pointer to any associated pager so no need to
9145749Smckusick 	 * lookup with vm_pager_lookup.
9245749Smckusick 	 */
9345749Smckusick 	vp = (struct vnode *)handle;
9445749Smckusick 	pager = (vm_pager_t)vp->v_vmdata;
9545749Smckusick 	if (pager == VM_PAGER_NULL) {
9645749Smckusick 		/*
9745749Smckusick 		 * Allocate pager structures
9845749Smckusick 		 */
9945749Smckusick 		pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
10045749Smckusick 		if (pager == VM_PAGER_NULL)
10145749Smckusick 			return(VM_PAGER_NULL);
10245749Smckusick 		vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK);
10345749Smckusick 		if (vnp == VN_PAGER_NULL) {
10445749Smckusick 			free((caddr_t)pager, M_VMPAGER);
10545749Smckusick 			return(VM_PAGER_NULL);
10645749Smckusick 		}
10745749Smckusick 		/*
10845749Smckusick 		 * And an object of the appropriate size
10945749Smckusick 		 */
11048042Smckusick 		if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) {
11145749Smckusick 			object = vm_object_allocate(round_page(vattr.va_size));
11245749Smckusick 			vm_object_enter(object, pager);
11345749Smckusick 			vm_object_setpager(object, pager, 0, TRUE);
11445749Smckusick 		} else {
11545749Smckusick 			free((caddr_t)vnp, M_VMPGDATA);
11645749Smckusick 			free((caddr_t)pager, M_VMPAGER);
11745749Smckusick 			return(VM_PAGER_NULL);
11845749Smckusick 		}
11945749Smckusick 		/*
12045749Smckusick 		 * Hold a reference to the vnode and initialize pager data.
12145749Smckusick 		 */
12245749Smckusick 		VREF(vp);
12345749Smckusick 		vnp->vnp_flags = 0;
12445749Smckusick 		vnp->vnp_vp = vp;
12545749Smckusick 		vnp->vnp_size = vattr.va_size;
12645749Smckusick 		queue_enter(&vnode_pager_list, pager, vm_pager_t, pg_list);
12745749Smckusick 		pager->pg_handle = handle;
12845749Smckusick 		pager->pg_type = PG_VNODE;
12945749Smckusick 		pager->pg_ops = &vnodepagerops;
13045749Smckusick 		pager->pg_data = (caddr_t)vnp;
13145749Smckusick 		vp->v_vmdata = (caddr_t)pager;
13245749Smckusick 	} else {
13345749Smckusick 		/*
13445749Smckusick 		 * vm_object_lookup() will remove the object from the
13545749Smckusick 		 * cache if found and also gain a reference to the object.
13645749Smckusick 		 */
13745749Smckusick 		object = vm_object_lookup(pager);
13847977Skarels #ifdef DEBUG
13945749Smckusick 		vnp = (vn_pager_t)pager->pg_data;
14047977Skarels #endif
14145749Smckusick 	}
14245749Smckusick #ifdef DEBUG
14345749Smckusick 	if (vpagerdebug & VDB_ALLOC)
14445749Smckusick 		printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n",
14545749Smckusick 		       vp, vnp->vnp_size, pager, object);
14645749Smckusick #endif
14745749Smckusick 	return(pager);
14845749Smckusick }
14945749Smckusick 
15045749Smckusick void
15145749Smckusick vnode_pager_dealloc(pager)
15245749Smckusick 	vm_pager_t pager;
15345749Smckusick {
15445749Smckusick 	register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
15545749Smckusick 	register struct vnode *vp;
15648042Smckusick 	struct proc *p = curproc;		/* XXX */
15745749Smckusick 
15845749Smckusick #ifdef DEBUG
15945749Smckusick 	if (vpagerdebug & VDB_FOLLOW)
16045749Smckusick 		printf("vnode_pager_dealloc(%x)\n", pager);
16145749Smckusick #endif
16245749Smckusick 	if (vp = vnp->vnp_vp) {
16345749Smckusick 		vp->v_vmdata = NULL;
16445749Smckusick 		vp->v_flag &= ~VTEXT;
16545749Smckusick #if 0
16645749Smckusick 		/* can hang if done at reboot on NFS FS */
16748042Smckusick 		(void) VOP_FSYNC(vp, p->p_ucred, p);
16845749Smckusick #endif
16945749Smckusick 		vrele(vp);
17045749Smckusick 	}
17145749Smckusick 	queue_remove(&vnode_pager_list, pager, vm_pager_t, pg_list);
17245749Smckusick 	free((caddr_t)vnp, M_VMPGDATA);
17345749Smckusick 	free((caddr_t)pager, M_VMPAGER);
17445749Smckusick }
17545749Smckusick 
17645749Smckusick vnode_pager_getpage(pager, m, sync)
17745749Smckusick 	vm_pager_t pager;
17845749Smckusick 	vm_page_t m;
17945749Smckusick 	boolean_t sync;
18045749Smckusick {
18145749Smckusick 
18245749Smckusick #ifdef DEBUG
18345749Smckusick 	if (vpagerdebug & VDB_FOLLOW)
18445749Smckusick 		printf("vnode_pager_getpage(%x, %x)\n", pager, m);
18545749Smckusick #endif
18645749Smckusick 	return(vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_READ));
18745749Smckusick }
18845749Smckusick 
18945749Smckusick boolean_t
19045749Smckusick vnode_pager_putpage(pager, m, sync)
19145749Smckusick 	vm_pager_t pager;
19245749Smckusick 	vm_page_t m;
19345749Smckusick 	boolean_t sync;
19445749Smckusick {
19545749Smckusick 	int err;
19645749Smckusick 
19745749Smckusick #ifdef DEBUG
19845749Smckusick 	if (vpagerdebug & VDB_FOLLOW)
19945749Smckusick 		printf("vnode_pager_putpage(%x, %x)\n", pager, m);
20045749Smckusick #endif
20145749Smckusick 	if (pager == VM_PAGER_NULL)
20245749Smckusick 		return;
20345749Smckusick 	err = vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_WRITE);
20445749Smckusick 	if (err == VM_PAGER_OK) {
20545749Smckusick 		m->clean = TRUE;			/* XXX - wrong place */
20645749Smckusick 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));	/* XXX - wrong place */
20745749Smckusick 	}
20845749Smckusick 	return(err);
20945749Smckusick }
21045749Smckusick 
21145749Smckusick boolean_t
21245749Smckusick vnode_pager_haspage(pager, offset)
21345749Smckusick 	vm_pager_t pager;
21445749Smckusick 	vm_offset_t offset;
21545749Smckusick {
21645749Smckusick 	register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
21745749Smckusick 	daddr_t bn;
21845749Smckusick 	int err;
21945749Smckusick 
22045749Smckusick #ifdef DEBUG
22145749Smckusick 	if (vpagerdebug & VDB_FOLLOW)
22245749Smckusick 		printf("vnode_pager_haspage(%x, %x)\n", pager, offset);
22345749Smckusick #endif
22445749Smckusick 
22545749Smckusick 	/*
22645749Smckusick 	 * Offset beyond end of file, do not have the page
22745749Smckusick 	 */
22845749Smckusick 	if (offset >= vnp->vnp_size) {
22945749Smckusick #ifdef DEBUG
23045749Smckusick 		if (vpagerdebug & (VDB_FAIL|VDB_SIZE))
23145749Smckusick 			printf("vnode_pager_haspage: pg %x, off %x, size %x\n",
23245749Smckusick 			       pager, offset, vnp->vnp_size);
23345749Smckusick #endif
23445749Smckusick 		return(FALSE);
23545749Smckusick 	}
23645749Smckusick 
23745749Smckusick 	/*
23845749Smckusick 	 * Read the index to find the disk block to read
23945749Smckusick 	 * from.  If there is no block, report that we don't
24045749Smckusick 	 * have this data.
24145749Smckusick 	 *
24245749Smckusick 	 * Assumes that the vnode has whole page or nothing.
24345749Smckusick 	 */
24445749Smckusick 	err = VOP_BMAP(vnp->vnp_vp,
24545749Smckusick 		       offset / vnp->vnp_vp->v_mount->mnt_stat.f_bsize,
246*48362Smckusick 		       (struct vnode **)0, &bn);
24745749Smckusick 	if (err) {
24845749Smckusick #ifdef DEBUG
24945749Smckusick 		if (vpagerdebug & VDB_FAIL)
25045749Smckusick 			printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n",
25145749Smckusick 			       err, pager, offset);
25245749Smckusick #endif
25345749Smckusick 		return(TRUE);
25445749Smckusick 	}
25545749Smckusick 	return((long)bn < 0 ? FALSE : TRUE);
25645749Smckusick }
25745749Smckusick 
25845749Smckusick /*
25945749Smckusick  * (XXX)
26045749Smckusick  * Lets the VM system know about a change in size for a file.
26145749Smckusick  * If this vnode is mapped into some address space (i.e. we have a pager
26245749Smckusick  * for it) we adjust our own internal size and flush any cached pages in
26345749Smckusick  * the associated object that are affected by the size change.
26445749Smckusick  *
26545749Smckusick  * Note: this routine may be invoked as a result of a pager put
26645749Smckusick  * operation (possibly at object termination time), so we must be careful.
26745749Smckusick  */
26845749Smckusick vnode_pager_setsize(vp, nsize)
26945749Smckusick 	struct vnode *vp;
27045749Smckusick 	u_long nsize;
27145749Smckusick {
27245749Smckusick 	register vn_pager_t vnp;
27345749Smckusick 	register vm_object_t object;
27445749Smckusick 	vm_pager_t pager;
27545749Smckusick 
27645749Smckusick 	/*
27745749Smckusick 	 * Not a mapped vnode
27845749Smckusick 	 */
27945749Smckusick 	if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL)
28045749Smckusick 		return;
28145749Smckusick 	/*
28245749Smckusick 	 * Hasn't changed size
28345749Smckusick 	 */
28445749Smckusick 	pager = (vm_pager_t)vp->v_vmdata;
28545749Smckusick 	vnp = (vn_pager_t)pager->pg_data;
28645749Smckusick 	if (nsize == vnp->vnp_size)
28745749Smckusick 		return;
28845749Smckusick 	/*
28945749Smckusick 	 * No object.
29045749Smckusick 	 * This can happen during object termination since
29145749Smckusick 	 * vm_object_page_clean is called after the object
29245749Smckusick 	 * has been removed from the hash table, and clean
29345749Smckusick 	 * may cause vnode write operations which can wind
29445749Smckusick 	 * up back here.
29545749Smckusick 	 */
29645749Smckusick 	object = vm_object_lookup(pager);
29745749Smckusick 	if (object == VM_OBJECT_NULL)
29845749Smckusick 		return;
29945749Smckusick 
30045749Smckusick #ifdef DEBUG
30145749Smckusick 	if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE))
30245749Smckusick 		printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n",
30345749Smckusick 		       vp, object, vnp->vnp_size, nsize);
30445749Smckusick #endif
30545749Smckusick 	/*
30645749Smckusick 	 * File has shrunk.
30745749Smckusick 	 * Toss any cached pages beyond the new EOF.
30845749Smckusick 	 */
30945749Smckusick 	if (nsize < vnp->vnp_size) {
31045749Smckusick 		vm_object_lock(object);
31145749Smckusick 		vm_object_page_remove(object,
31245749Smckusick 				      (vm_offset_t)nsize, vnp->vnp_size);
31345749Smckusick 		vm_object_unlock(object);
31445749Smckusick 	}
31545749Smckusick 	vnp->vnp_size = (vm_offset_t)nsize;
31645749Smckusick 	vm_object_deallocate(object);
31745749Smckusick }
31845749Smckusick 
31945749Smckusick vnode_pager_umount(mp)
32045749Smckusick 	register struct mount *mp;
32145749Smckusick {
32245749Smckusick 	register vm_pager_t pager, npager;
32345749Smckusick 	struct vnode *vp;
32445749Smckusick 
32545749Smckusick 	pager = (vm_pager_t) queue_first(&vnode_pager_list);
32645749Smckusick 	while (!queue_end(&vnode_pager_list, (queue_entry_t)pager)) {
32745749Smckusick 		/*
32845749Smckusick 		 * Save the next pointer now since uncaching may
32945749Smckusick 		 * terminate the object and render pager invalid
33045749Smckusick 		 */
33145749Smckusick 		vp = ((vn_pager_t)pager->pg_data)->vnp_vp;
33245749Smckusick 		npager = (vm_pager_t) queue_next(&pager->pg_list);
33345749Smckusick 		if (mp == (struct mount *)0 || vp->v_mount == mp)
33445749Smckusick 			(void) vnode_pager_uncache(vp);
33545749Smckusick 		pager = npager;
33645749Smckusick 	}
33745749Smckusick }
33845749Smckusick 
33945749Smckusick /*
34045749Smckusick  * Remove vnode associated object from the object cache.
34145749Smckusick  *
34245749Smckusick  * Note: this routine may be invoked as a result of a pager put
34345749Smckusick  * operation (possibly at object termination time), so we must be careful.
34445749Smckusick  */
34545749Smckusick boolean_t
34645749Smckusick vnode_pager_uncache(vp)
34745749Smckusick 	register struct vnode *vp;
34845749Smckusick {
34945749Smckusick 	register vm_object_t object;
35045749Smckusick 	boolean_t uncached, locked;
35145749Smckusick 	vm_pager_t pager;
35245749Smckusick 
35345749Smckusick 	/*
35445749Smckusick 	 * Not a mapped vnode
35545749Smckusick 	 */
35645749Smckusick 	pager = (vm_pager_t)vp->v_vmdata;
35745749Smckusick 	if (pager == vm_pager_null)
35845749Smckusick 		return (TRUE);
35945749Smckusick 	/*
36045749Smckusick 	 * Unlock the vnode if it is currently locked.
36145749Smckusick 	 * We do this since uncaching the object may result
36245749Smckusick 	 * in its destruction which may initiate paging
36345749Smckusick 	 * activity which may necessitate locking the vnode.
36445749Smckusick 	 */
36545749Smckusick 	locked = VOP_ISLOCKED(vp);
36645749Smckusick 	if (locked)
36745749Smckusick 		VOP_UNLOCK(vp);
36845749Smckusick 	/*
36945749Smckusick 	 * Must use vm_object_lookup() as it actually removes
37045749Smckusick 	 * the object from the cache list.
37145749Smckusick 	 */
37245749Smckusick 	object = vm_object_lookup(pager);
37345749Smckusick 	if (object) {
37445749Smckusick 		uncached = (object->ref_count <= 1);
37545749Smckusick 		pager_cache(object, FALSE);
37645749Smckusick 	} else
37745749Smckusick 		uncached = TRUE;
37845749Smckusick 	if (locked)
37945749Smckusick 		VOP_LOCK(vp);
38045749Smckusick 	return(uncached);
38145749Smckusick }
38245749Smckusick 
38345749Smckusick vnode_pager_io(vnp, m, rw)
38445749Smckusick 	register vn_pager_t vnp;
38545749Smckusick 	vm_page_t m;
38645749Smckusick 	enum uio_rw rw;
38745749Smckusick {
38845749Smckusick 	struct uio auio;
38945749Smckusick 	struct iovec aiov;
39045749Smckusick 	vm_offset_t kva, foff;
39145749Smckusick 	int error, size;
39248042Smckusick 	struct proc *p = curproc;		/* XXX */
39345749Smckusick 
39445749Smckusick #ifdef DEBUG
39545749Smckusick 	if (vpagerdebug & VDB_FOLLOW)
39645749Smckusick 		printf("vnode_pager_io(%x, %x, %c): vnode %x\n",
39745749Smckusick 		       vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp);
39845749Smckusick #endif
39945749Smckusick 	foff = m->offset + m->object->paging_offset;
40045749Smckusick 	/*
40145749Smckusick 	 * Return failure if beyond current EOF
40245749Smckusick 	 */
40345749Smckusick 	if (foff >= vnp->vnp_size) {
40445749Smckusick #ifdef DEBUG
40545749Smckusick 		if (vpagerdebug & VDB_SIZE)
40645749Smckusick 			printf("vnode_pager_io: vp %x, off %d size %d\n",
40745749Smckusick 			       vnp->vnp_vp, foff, vnp->vnp_size);
40845749Smckusick #endif
40945749Smckusick 		return(VM_PAGER_BAD);
41045749Smckusick 	}
41145749Smckusick 	if (foff + PAGE_SIZE > vnp->vnp_size)
41245749Smckusick 		size = vnp->vnp_size - foff;
41345749Smckusick 	else
41445749Smckusick 		size = PAGE_SIZE;
41545749Smckusick 	/*
41645749Smckusick 	 * Allocate a kernel virtual address and initialize so that
41745749Smckusick 	 * we can use VOP_READ/WRITE routines.
41845749Smckusick 	 */
41945749Smckusick 	kva = vm_pager_map_page(m);
42045749Smckusick 	aiov.iov_base = (caddr_t)kva;
42145749Smckusick 	aiov.iov_len = size;
42245749Smckusick 	auio.uio_iov = &aiov;
42345749Smckusick 	auio.uio_iovcnt = 1;
42445749Smckusick 	auio.uio_offset = foff;
42545749Smckusick 	auio.uio_segflg = UIO_SYSSPACE;
42645749Smckusick 	auio.uio_rw = rw;
42745749Smckusick 	auio.uio_resid = size;
42848042Smckusick 	auio.uio_procp = (struct proc *)0;
42945749Smckusick #ifdef DEBUG
43045749Smckusick 	if (vpagerdebug & VDB_IO)
43145749Smckusick 		printf("vnode_pager_io: vp %x kva %x foff %x size %x",
43245749Smckusick 		       vnp->vnp_vp, kva, foff, size);
43345749Smckusick #endif
43445749Smckusick 	if (rw == UIO_READ)
43548042Smckusick 		error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred);
43645749Smckusick 	else
43748042Smckusick 		error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred);
43845749Smckusick #ifdef DEBUG
43945749Smckusick 	if (vpagerdebug & VDB_IO) {
44045749Smckusick 		if (error || auio.uio_resid)
44145749Smckusick 			printf(" returns error %x, resid %x",
44245749Smckusick 			       error, auio.uio_resid);
44345749Smckusick 		printf("\n");
44445749Smckusick 	}
44545749Smckusick #endif
44645749Smckusick 	if (!error) {
44745749Smckusick 		register int count = size - auio.uio_resid;
44845749Smckusick 
44945749Smckusick 		if (count == 0)
45045749Smckusick 			error = EINVAL;
45145749Smckusick 		else if (count != PAGE_SIZE && rw == UIO_READ)
45245749Smckusick 			bzero(kva + count, PAGE_SIZE - count);
45345749Smckusick 	}
45445749Smckusick 	vm_pager_unmap_page(kva);
45545749Smckusick 	return (error ? VM_PAGER_FAIL : VM_PAGER_OK);
45645749Smckusick }
45745749Smckusick #endif
458