145749Smckusick /* 245749Smckusick * Copyright (c) 1990 University of Utah. 345749Smckusick * Copyright (c) 1991 The Regents of the University of California. 445749Smckusick * All rights reserved. 545749Smckusick * 645749Smckusick * This code is derived from software contributed to Berkeley by 745749Smckusick * the Systems Programming Group of the University of Utah Computer 845749Smckusick * Science Department. 945749Smckusick * 1045749Smckusick * %sccs.include.redist.c% 1145749Smckusick * 12*53480Smckusick * @(#)vnode_pager.c 7.9 (Berkeley) 05/13/92 1345749Smckusick */ 1445749Smckusick 1545749Smckusick /* 1645749Smckusick * Page to/from files (vnodes). 1745749Smckusick * 1845749Smckusick * TODO: 1945749Smckusick * pageouts 2047977Skarels * fix credential use (uses current process credentials now) 2145749Smckusick */ 2245749Smckusick #include "vnodepager.h" 2345749Smckusick #if NVNODEPAGER > 0 2445749Smckusick 2553342Sbostic #include <sys/param.h> 2653342Sbostic #include <sys/systm.h> 2753342Sbostic #include <sys/proc.h> 2853342Sbostic #include <sys/malloc.h> 2953342Sbostic #include <sys/vnode.h> 3053342Sbostic #include <sys/uio.h> 3153342Sbostic #include <sys/mount.h> 3247977Skarels 3353342Sbostic #include <vm/vm.h> 3453342Sbostic #include <vm/vm_page.h> 3553342Sbostic #include <vm/vnode_pager.h> 3645749Smckusick 3745749Smckusick queue_head_t vnode_pager_list; /* list of managed vnodes */ 3845749Smckusick 3945749Smckusick #ifdef DEBUG 4045749Smckusick int vpagerdebug = 0x00; 4145749Smckusick #define VDB_FOLLOW 0x01 4245749Smckusick #define VDB_INIT 0x02 4345749Smckusick #define VDB_IO 0x04 4445749Smckusick #define VDB_FAIL 0x08 4545749Smckusick #define VDB_ALLOC 0x10 4645749Smckusick #define VDB_SIZE 0x20 4745749Smckusick #endif 4845749Smckusick 4953342Sbostic static vm_pager_t vnode_pager_alloc __P((caddr_t, vm_size_t, vm_prot_t)); 5053342Sbostic static void vnode_pager_dealloc __P((vm_pager_t)); 5153342Sbostic static int vnode_pager_getpage 5253342Sbostic __P((vm_pager_t, vm_page_t, boolean_t)); 5353342Sbostic static boolean_t vnode_pager_haspage __P((vm_pager_t, vm_offset_t)); 5453342Sbostic static void vnode_pager_init __P((void)); 5553342Sbostic static int vnode_pager_io 5653342Sbostic __P((vn_pager_t, vm_page_t, enum uio_rw)); 5753342Sbostic static boolean_t vnode_pager_putpage 5853342Sbostic __P((vm_pager_t, vm_page_t, boolean_t)); 5953342Sbostic 6053342Sbostic struct pagerops vnodepagerops = { 6153342Sbostic vnode_pager_init, 6253342Sbostic vnode_pager_alloc, 6353342Sbostic vnode_pager_dealloc, 6453342Sbostic vnode_pager_getpage, 6553342Sbostic vnode_pager_putpage, 6653342Sbostic vnode_pager_haspage 6753342Sbostic }; 6853342Sbostic 6953342Sbostic static void 7045749Smckusick vnode_pager_init() 7145749Smckusick { 7245749Smckusick #ifdef DEBUG 7345749Smckusick if (vpagerdebug & VDB_FOLLOW) 7445749Smckusick printf("vnode_pager_init()\n"); 7545749Smckusick #endif 7645749Smckusick queue_init(&vnode_pager_list); 7745749Smckusick } 7845749Smckusick 7945749Smckusick /* 8045749Smckusick * Allocate (or lookup) pager for a vnode. 8145749Smckusick * Handle is a vnode pointer. 8245749Smckusick */ 8353342Sbostic static vm_pager_t 8445749Smckusick vnode_pager_alloc(handle, size, prot) 8545749Smckusick caddr_t handle; 8645749Smckusick vm_size_t size; 8745749Smckusick vm_prot_t prot; 8845749Smckusick { 8945749Smckusick register vm_pager_t pager; 9045749Smckusick register vn_pager_t vnp; 9145749Smckusick vm_object_t object; 9245749Smckusick struct vattr vattr; 9345749Smckusick struct vnode *vp; 9448042Smckusick struct proc *p = curproc; /* XXX */ 9545749Smckusick 9645749Smckusick #ifdef DEBUG 9745749Smckusick if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC)) 9845749Smckusick printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot); 9945749Smckusick #endif 10045749Smckusick /* 10145749Smckusick * Pageout to vnode, no can do yet. 10245749Smckusick */ 10345749Smckusick if (handle == NULL) 10448397Skarels return(NULL); 10545749Smckusick 10645749Smckusick /* 10745749Smckusick * Vnodes keep a pointer to any associated pager so no need to 10845749Smckusick * lookup with vm_pager_lookup. 10945749Smckusick */ 11045749Smckusick vp = (struct vnode *)handle; 11145749Smckusick pager = (vm_pager_t)vp->v_vmdata; 11248397Skarels if (pager == NULL) { 11345749Smckusick /* 11445749Smckusick * Allocate pager structures 11545749Smckusick */ 11645749Smckusick pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); 11748397Skarels if (pager == NULL) 11848397Skarels return(NULL); 11945749Smckusick vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); 12048397Skarels if (vnp == NULL) { 12145749Smckusick free((caddr_t)pager, M_VMPAGER); 12248397Skarels return(NULL); 12345749Smckusick } 12445749Smckusick /* 12545749Smckusick * And an object of the appropriate size 12645749Smckusick */ 12748042Smckusick if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) { 12845749Smckusick object = vm_object_allocate(round_page(vattr.va_size)); 12945749Smckusick vm_object_enter(object, pager); 13045749Smckusick vm_object_setpager(object, pager, 0, TRUE); 13145749Smckusick } else { 13245749Smckusick free((caddr_t)vnp, M_VMPGDATA); 13345749Smckusick free((caddr_t)pager, M_VMPAGER); 13448397Skarels return(NULL); 13545749Smckusick } 13645749Smckusick /* 13745749Smckusick * Hold a reference to the vnode and initialize pager data. 13845749Smckusick */ 13945749Smckusick VREF(vp); 14045749Smckusick vnp->vnp_flags = 0; 14145749Smckusick vnp->vnp_vp = vp; 14245749Smckusick vnp->vnp_size = vattr.va_size; 14345749Smckusick queue_enter(&vnode_pager_list, pager, vm_pager_t, pg_list); 14445749Smckusick pager->pg_handle = handle; 14545749Smckusick pager->pg_type = PG_VNODE; 14645749Smckusick pager->pg_ops = &vnodepagerops; 14745749Smckusick pager->pg_data = (caddr_t)vnp; 14845749Smckusick vp->v_vmdata = (caddr_t)pager; 14945749Smckusick } else { 15045749Smckusick /* 15145749Smckusick * vm_object_lookup() will remove the object from the 15245749Smckusick * cache if found and also gain a reference to the object. 15345749Smckusick */ 15445749Smckusick object = vm_object_lookup(pager); 15547977Skarels #ifdef DEBUG 15645749Smckusick vnp = (vn_pager_t)pager->pg_data; 15747977Skarels #endif 15845749Smckusick } 15945749Smckusick #ifdef DEBUG 16045749Smckusick if (vpagerdebug & VDB_ALLOC) 16145749Smckusick printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n", 16245749Smckusick vp, vnp->vnp_size, pager, object); 16345749Smckusick #endif 16445749Smckusick return(pager); 16545749Smckusick } 16645749Smckusick 16753342Sbostic static void 16845749Smckusick vnode_pager_dealloc(pager) 16945749Smckusick vm_pager_t pager; 17045749Smckusick { 17145749Smckusick register vn_pager_t vnp = (vn_pager_t)pager->pg_data; 17245749Smckusick register struct vnode *vp; 17348042Smckusick struct proc *p = curproc; /* XXX */ 17445749Smckusick 17545749Smckusick #ifdef DEBUG 17645749Smckusick if (vpagerdebug & VDB_FOLLOW) 17745749Smckusick printf("vnode_pager_dealloc(%x)\n", pager); 17845749Smckusick #endif 17945749Smckusick if (vp = vnp->vnp_vp) { 18045749Smckusick vp->v_vmdata = NULL; 18145749Smckusick vp->v_flag &= ~VTEXT; 18245749Smckusick #if 0 18345749Smckusick /* can hang if done at reboot on NFS FS */ 18448042Smckusick (void) VOP_FSYNC(vp, p->p_ucred, p); 18545749Smckusick #endif 18645749Smckusick vrele(vp); 18745749Smckusick } 18845749Smckusick queue_remove(&vnode_pager_list, pager, vm_pager_t, pg_list); 18945749Smckusick free((caddr_t)vnp, M_VMPGDATA); 19045749Smckusick free((caddr_t)pager, M_VMPAGER); 19145749Smckusick } 19245749Smckusick 19353342Sbostic static int 19445749Smckusick vnode_pager_getpage(pager, m, sync) 19545749Smckusick vm_pager_t pager; 19645749Smckusick vm_page_t m; 19745749Smckusick boolean_t sync; 19845749Smckusick { 19945749Smckusick 20045749Smckusick #ifdef DEBUG 20145749Smckusick if (vpagerdebug & VDB_FOLLOW) 20245749Smckusick printf("vnode_pager_getpage(%x, %x)\n", pager, m); 20345749Smckusick #endif 20445749Smckusick return(vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_READ)); 20545749Smckusick } 20645749Smckusick 20753342Sbostic static boolean_t 20845749Smckusick vnode_pager_putpage(pager, m, sync) 20945749Smckusick vm_pager_t pager; 21045749Smckusick vm_page_t m; 21145749Smckusick boolean_t sync; 21245749Smckusick { 21345749Smckusick int err; 21445749Smckusick 21545749Smckusick #ifdef DEBUG 21645749Smckusick if (vpagerdebug & VDB_FOLLOW) 21745749Smckusick printf("vnode_pager_putpage(%x, %x)\n", pager, m); 21845749Smckusick #endif 21948397Skarels if (pager == NULL) 22045749Smckusick return; 22145749Smckusick err = vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_WRITE); 22245749Smckusick if (err == VM_PAGER_OK) { 22345749Smckusick m->clean = TRUE; /* XXX - wrong place */ 22445749Smckusick pmap_clear_modify(VM_PAGE_TO_PHYS(m)); /* XXX - wrong place */ 22545749Smckusick } 22645749Smckusick return(err); 22745749Smckusick } 22845749Smckusick 22953342Sbostic static boolean_t 23045749Smckusick vnode_pager_haspage(pager, offset) 23145749Smckusick vm_pager_t pager; 23245749Smckusick vm_offset_t offset; 23345749Smckusick { 23445749Smckusick register vn_pager_t vnp = (vn_pager_t)pager->pg_data; 23545749Smckusick daddr_t bn; 23645749Smckusick int err; 23745749Smckusick 23845749Smckusick #ifdef DEBUG 23945749Smckusick if (vpagerdebug & VDB_FOLLOW) 24045749Smckusick printf("vnode_pager_haspage(%x, %x)\n", pager, offset); 24145749Smckusick #endif 24245749Smckusick 24345749Smckusick /* 24445749Smckusick * Offset beyond end of file, do not have the page 24545749Smckusick */ 24645749Smckusick if (offset >= vnp->vnp_size) { 24745749Smckusick #ifdef DEBUG 24845749Smckusick if (vpagerdebug & (VDB_FAIL|VDB_SIZE)) 24945749Smckusick printf("vnode_pager_haspage: pg %x, off %x, size %x\n", 25045749Smckusick pager, offset, vnp->vnp_size); 25145749Smckusick #endif 25245749Smckusick return(FALSE); 25345749Smckusick } 25445749Smckusick 25545749Smckusick /* 25645749Smckusick * Read the index to find the disk block to read 25745749Smckusick * from. If there is no block, report that we don't 25845749Smckusick * have this data. 25945749Smckusick * 26045749Smckusick * Assumes that the vnode has whole page or nothing. 26145749Smckusick */ 26245749Smckusick err = VOP_BMAP(vnp->vnp_vp, 26351941Smckusick offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize, 26448362Smckusick (struct vnode **)0, &bn); 26545749Smckusick if (err) { 26645749Smckusick #ifdef DEBUG 26745749Smckusick if (vpagerdebug & VDB_FAIL) 26845749Smckusick printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n", 26945749Smckusick err, pager, offset); 27045749Smckusick #endif 27145749Smckusick return(TRUE); 27245749Smckusick } 27345749Smckusick return((long)bn < 0 ? FALSE : TRUE); 27445749Smckusick } 27545749Smckusick 27645749Smckusick /* 27745749Smckusick * (XXX) 27845749Smckusick * Lets the VM system know about a change in size for a file. 27945749Smckusick * If this vnode is mapped into some address space (i.e. we have a pager 28045749Smckusick * for it) we adjust our own internal size and flush any cached pages in 28145749Smckusick * the associated object that are affected by the size change. 28245749Smckusick * 28345749Smckusick * Note: this routine may be invoked as a result of a pager put 28445749Smckusick * operation (possibly at object termination time), so we must be careful. 28545749Smckusick */ 286*53480Smckusick void 28745749Smckusick vnode_pager_setsize(vp, nsize) 28845749Smckusick struct vnode *vp; 28945749Smckusick u_long nsize; 29045749Smckusick { 29145749Smckusick register vn_pager_t vnp; 29245749Smckusick register vm_object_t object; 29345749Smckusick vm_pager_t pager; 29445749Smckusick 29545749Smckusick /* 29645749Smckusick * Not a mapped vnode 29745749Smckusick */ 29845749Smckusick if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) 29945749Smckusick return; 30045749Smckusick /* 30145749Smckusick * Hasn't changed size 30245749Smckusick */ 30345749Smckusick pager = (vm_pager_t)vp->v_vmdata; 30445749Smckusick vnp = (vn_pager_t)pager->pg_data; 30545749Smckusick if (nsize == vnp->vnp_size) 30645749Smckusick return; 30745749Smckusick /* 30845749Smckusick * No object. 30945749Smckusick * This can happen during object termination since 31045749Smckusick * vm_object_page_clean is called after the object 31145749Smckusick * has been removed from the hash table, and clean 31245749Smckusick * may cause vnode write operations which can wind 31345749Smckusick * up back here. 31445749Smckusick */ 31545749Smckusick object = vm_object_lookup(pager); 31648397Skarels if (object == NULL) 31745749Smckusick return; 31845749Smckusick 31945749Smckusick #ifdef DEBUG 32045749Smckusick if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE)) 32145749Smckusick printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n", 32245749Smckusick vp, object, vnp->vnp_size, nsize); 32345749Smckusick #endif 32445749Smckusick /* 32545749Smckusick * File has shrunk. 32645749Smckusick * Toss any cached pages beyond the new EOF. 32745749Smckusick */ 32845749Smckusick if (nsize < vnp->vnp_size) { 32945749Smckusick vm_object_lock(object); 33045749Smckusick vm_object_page_remove(object, 33145749Smckusick (vm_offset_t)nsize, vnp->vnp_size); 33245749Smckusick vm_object_unlock(object); 33345749Smckusick } 33445749Smckusick vnp->vnp_size = (vm_offset_t)nsize; 33545749Smckusick vm_object_deallocate(object); 33645749Smckusick } 33745749Smckusick 338*53480Smckusick void 33945749Smckusick vnode_pager_umount(mp) 34045749Smckusick register struct mount *mp; 34145749Smckusick { 34245749Smckusick register vm_pager_t pager, npager; 34345749Smckusick struct vnode *vp; 34445749Smckusick 34545749Smckusick pager = (vm_pager_t) queue_first(&vnode_pager_list); 34645749Smckusick while (!queue_end(&vnode_pager_list, (queue_entry_t)pager)) { 34745749Smckusick /* 34845749Smckusick * Save the next pointer now since uncaching may 34945749Smckusick * terminate the object and render pager invalid 35045749Smckusick */ 35145749Smckusick vp = ((vn_pager_t)pager->pg_data)->vnp_vp; 35245749Smckusick npager = (vm_pager_t) queue_next(&pager->pg_list); 35345749Smckusick if (mp == (struct mount *)0 || vp->v_mount == mp) 35445749Smckusick (void) vnode_pager_uncache(vp); 35545749Smckusick pager = npager; 35645749Smckusick } 35745749Smckusick } 35845749Smckusick 35945749Smckusick /* 36045749Smckusick * Remove vnode associated object from the object cache. 36145749Smckusick * 36245749Smckusick * Note: this routine may be invoked as a result of a pager put 36345749Smckusick * operation (possibly at object termination time), so we must be careful. 36445749Smckusick */ 365*53480Smckusick boolean_t 36645749Smckusick vnode_pager_uncache(vp) 36745749Smckusick register struct vnode *vp; 36845749Smckusick { 36945749Smckusick register vm_object_t object; 37045749Smckusick boolean_t uncached, locked; 37145749Smckusick vm_pager_t pager; 37245749Smckusick 37345749Smckusick /* 37445749Smckusick * Not a mapped vnode 37545749Smckusick */ 37645749Smckusick pager = (vm_pager_t)vp->v_vmdata; 37748397Skarels if (pager == NULL) 37845749Smckusick return (TRUE); 37945749Smckusick /* 38045749Smckusick * Unlock the vnode if it is currently locked. 38145749Smckusick * We do this since uncaching the object may result 38245749Smckusick * in its destruction which may initiate paging 38345749Smckusick * activity which may necessitate locking the vnode. 38445749Smckusick */ 38545749Smckusick locked = VOP_ISLOCKED(vp); 38645749Smckusick if (locked) 38745749Smckusick VOP_UNLOCK(vp); 38845749Smckusick /* 38945749Smckusick * Must use vm_object_lookup() as it actually removes 39045749Smckusick * the object from the cache list. 39145749Smckusick */ 39245749Smckusick object = vm_object_lookup(pager); 39345749Smckusick if (object) { 39445749Smckusick uncached = (object->ref_count <= 1); 39545749Smckusick pager_cache(object, FALSE); 39645749Smckusick } else 39745749Smckusick uncached = TRUE; 39845749Smckusick if (locked) 39945749Smckusick VOP_LOCK(vp); 40045749Smckusick return(uncached); 40145749Smckusick } 40245749Smckusick 40353342Sbostic static int 40445749Smckusick vnode_pager_io(vnp, m, rw) 40545749Smckusick register vn_pager_t vnp; 40645749Smckusick vm_page_t m; 40745749Smckusick enum uio_rw rw; 40845749Smckusick { 40945749Smckusick struct uio auio; 41045749Smckusick struct iovec aiov; 41145749Smckusick vm_offset_t kva, foff; 41245749Smckusick int error, size; 41348042Smckusick struct proc *p = curproc; /* XXX */ 41445749Smckusick 41545749Smckusick #ifdef DEBUG 41645749Smckusick if (vpagerdebug & VDB_FOLLOW) 41745749Smckusick printf("vnode_pager_io(%x, %x, %c): vnode %x\n", 41845749Smckusick vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp); 41945749Smckusick #endif 42045749Smckusick foff = m->offset + m->object->paging_offset; 42145749Smckusick /* 42245749Smckusick * Return failure if beyond current EOF 42345749Smckusick */ 42445749Smckusick if (foff >= vnp->vnp_size) { 42545749Smckusick #ifdef DEBUG 42645749Smckusick if (vpagerdebug & VDB_SIZE) 42745749Smckusick printf("vnode_pager_io: vp %x, off %d size %d\n", 42845749Smckusick vnp->vnp_vp, foff, vnp->vnp_size); 42945749Smckusick #endif 43045749Smckusick return(VM_PAGER_BAD); 43145749Smckusick } 43245749Smckusick if (foff + PAGE_SIZE > vnp->vnp_size) 43345749Smckusick size = vnp->vnp_size - foff; 43445749Smckusick else 43545749Smckusick size = PAGE_SIZE; 43645749Smckusick /* 43745749Smckusick * Allocate a kernel virtual address and initialize so that 43845749Smckusick * we can use VOP_READ/WRITE routines. 43945749Smckusick */ 44045749Smckusick kva = vm_pager_map_page(m); 44145749Smckusick aiov.iov_base = (caddr_t)kva; 44245749Smckusick aiov.iov_len = size; 44345749Smckusick auio.uio_iov = &aiov; 44445749Smckusick auio.uio_iovcnt = 1; 44545749Smckusick auio.uio_offset = foff; 44645749Smckusick auio.uio_segflg = UIO_SYSSPACE; 44745749Smckusick auio.uio_rw = rw; 44845749Smckusick auio.uio_resid = size; 44948042Smckusick auio.uio_procp = (struct proc *)0; 45045749Smckusick #ifdef DEBUG 45145749Smckusick if (vpagerdebug & VDB_IO) 45245749Smckusick printf("vnode_pager_io: vp %x kva %x foff %x size %x", 45345749Smckusick vnp->vnp_vp, kva, foff, size); 45445749Smckusick #endif 45545749Smckusick if (rw == UIO_READ) 45648042Smckusick error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred); 45745749Smckusick else 45848042Smckusick error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred); 45945749Smckusick #ifdef DEBUG 46045749Smckusick if (vpagerdebug & VDB_IO) { 46145749Smckusick if (error || auio.uio_resid) 46245749Smckusick printf(" returns error %x, resid %x", 46345749Smckusick error, auio.uio_resid); 46445749Smckusick printf("\n"); 46545749Smckusick } 46645749Smckusick #endif 46745749Smckusick if (!error) { 46845749Smckusick register int count = size - auio.uio_resid; 46945749Smckusick 47045749Smckusick if (count == 0) 47145749Smckusick error = EINVAL; 47245749Smckusick else if (count != PAGE_SIZE && rw == UIO_READ) 47353342Sbostic bzero((void *)(kva + count), PAGE_SIZE - count); 47445749Smckusick } 47545749Smckusick vm_pager_unmap_page(kva); 47645749Smckusick return (error ? VM_PAGER_FAIL : VM_PAGER_OK); 47745749Smckusick } 47845749Smckusick #endif 479