145749Smckusick /* 245749Smckusick * Copyright (c) 1990 University of Utah. 345749Smckusick * Copyright (c) 1991 The Regents of the University of California. 445749Smckusick * All rights reserved. 545749Smckusick * 645749Smckusick * This code is derived from software contributed to Berkeley by 745749Smckusick * the Systems Programming Group of the University of Utah Computer 845749Smckusick * Science Department. 945749Smckusick * 1045749Smckusick * %sccs.include.redist.c% 1145749Smckusick * 12*48362Smckusick * @(#)vnode_pager.c 7.4 (Berkeley) 04/19/91 1345749Smckusick */ 1445749Smckusick 1545749Smckusick /* 1645749Smckusick * Page to/from files (vnodes). 1745749Smckusick * 1845749Smckusick * TODO: 1945749Smckusick * pageouts 2047977Skarels * fix credential use (uses current process credentials now) 2145749Smckusick */ 2245749Smckusick #include "vnodepager.h" 2345749Smckusick #if NVNODEPAGER > 0 2445749Smckusick 2545749Smckusick #include "param.h" 2647977Skarels #include "proc.h" 2745749Smckusick #include "malloc.h" 2845749Smckusick #include "vnode.h" 2945749Smckusick #include "uio.h" 3045749Smckusick #include "mount.h" 3147977Skarels 3247977Skarels #include "vm_param.h" 3347977Skarels #include "lock.h" 3445749Smckusick #include "queue.h" 3547977Skarels #include "vm_prot.h" 3647977Skarels #include "vm_object.h" 3747977Skarels #include "vm_page.h" 3847977Skarels #include "vnode_pager.h" 3945749Smckusick 4045749Smckusick queue_head_t vnode_pager_list; /* list of managed vnodes */ 4145749Smckusick 4245749Smckusick #ifdef DEBUG 4345749Smckusick int vpagerdebug = 0x00; 4445749Smckusick #define VDB_FOLLOW 0x01 4545749Smckusick #define VDB_INIT 0x02 4645749Smckusick #define VDB_IO 0x04 4745749Smckusick #define VDB_FAIL 0x08 4845749Smckusick #define VDB_ALLOC 0x10 4945749Smckusick #define VDB_SIZE 0x20 5045749Smckusick #endif 5145749Smckusick 5245749Smckusick void 5345749Smckusick vnode_pager_init() 5445749Smckusick { 5545749Smckusick #ifdef DEBUG 5645749Smckusick if (vpagerdebug & VDB_FOLLOW) 5745749Smckusick printf("vnode_pager_init()\n"); 5845749Smckusick #endif 5945749Smckusick queue_init(&vnode_pager_list); 6045749Smckusick } 6145749Smckusick 6245749Smckusick /* 6345749Smckusick * Allocate (or lookup) pager for a vnode. 6445749Smckusick * Handle is a vnode pointer. 6545749Smckusick */ 6645749Smckusick vm_pager_t 6745749Smckusick vnode_pager_alloc(handle, size, prot) 6845749Smckusick caddr_t handle; 6945749Smckusick vm_size_t size; 7045749Smckusick vm_prot_t prot; 7145749Smckusick { 7245749Smckusick register vm_pager_t pager; 7345749Smckusick register vn_pager_t vnp; 7445749Smckusick vm_object_t object; 7545749Smckusick struct vattr vattr; 7645749Smckusick struct vnode *vp; 7748042Smckusick struct proc *p = curproc; /* XXX */ 7845749Smckusick 7945749Smckusick #ifdef DEBUG 8045749Smckusick if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC)) 8145749Smckusick printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot); 8245749Smckusick #endif 8345749Smckusick /* 8445749Smckusick * Pageout to vnode, no can do yet. 8545749Smckusick */ 8645749Smckusick if (handle == NULL) 8745749Smckusick return(VM_PAGER_NULL); 8845749Smckusick 8945749Smckusick /* 9045749Smckusick * Vnodes keep a pointer to any associated pager so no need to 9145749Smckusick * lookup with vm_pager_lookup. 9245749Smckusick */ 9345749Smckusick vp = (struct vnode *)handle; 9445749Smckusick pager = (vm_pager_t)vp->v_vmdata; 9545749Smckusick if (pager == VM_PAGER_NULL) { 9645749Smckusick /* 9745749Smckusick * Allocate pager structures 9845749Smckusick */ 9945749Smckusick pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); 10045749Smckusick if (pager == VM_PAGER_NULL) 10145749Smckusick return(VM_PAGER_NULL); 10245749Smckusick vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); 10345749Smckusick if (vnp == VN_PAGER_NULL) { 10445749Smckusick free((caddr_t)pager, M_VMPAGER); 10545749Smckusick return(VM_PAGER_NULL); 10645749Smckusick } 10745749Smckusick /* 10845749Smckusick * And an object of the appropriate size 10945749Smckusick */ 11048042Smckusick if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) { 11145749Smckusick object = vm_object_allocate(round_page(vattr.va_size)); 11245749Smckusick vm_object_enter(object, pager); 11345749Smckusick vm_object_setpager(object, pager, 0, TRUE); 11445749Smckusick } else { 11545749Smckusick free((caddr_t)vnp, M_VMPGDATA); 11645749Smckusick free((caddr_t)pager, M_VMPAGER); 11745749Smckusick return(VM_PAGER_NULL); 11845749Smckusick } 11945749Smckusick /* 12045749Smckusick * Hold a reference to the vnode and initialize pager data. 12145749Smckusick */ 12245749Smckusick VREF(vp); 12345749Smckusick vnp->vnp_flags = 0; 12445749Smckusick vnp->vnp_vp = vp; 12545749Smckusick vnp->vnp_size = vattr.va_size; 12645749Smckusick queue_enter(&vnode_pager_list, pager, vm_pager_t, pg_list); 12745749Smckusick pager->pg_handle = handle; 12845749Smckusick pager->pg_type = PG_VNODE; 12945749Smckusick pager->pg_ops = &vnodepagerops; 13045749Smckusick pager->pg_data = (caddr_t)vnp; 13145749Smckusick vp->v_vmdata = (caddr_t)pager; 13245749Smckusick } else { 13345749Smckusick /* 13445749Smckusick * vm_object_lookup() will remove the object from the 13545749Smckusick * cache if found and also gain a reference to the object. 13645749Smckusick */ 13745749Smckusick object = vm_object_lookup(pager); 13847977Skarels #ifdef DEBUG 13945749Smckusick vnp = (vn_pager_t)pager->pg_data; 14047977Skarels #endif 14145749Smckusick } 14245749Smckusick #ifdef DEBUG 14345749Smckusick if (vpagerdebug & VDB_ALLOC) 14445749Smckusick printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n", 14545749Smckusick vp, vnp->vnp_size, pager, object); 14645749Smckusick #endif 14745749Smckusick return(pager); 14845749Smckusick } 14945749Smckusick 15045749Smckusick void 15145749Smckusick vnode_pager_dealloc(pager) 15245749Smckusick vm_pager_t pager; 15345749Smckusick { 15445749Smckusick register vn_pager_t vnp = (vn_pager_t)pager->pg_data; 15545749Smckusick register struct vnode *vp; 15648042Smckusick struct proc *p = curproc; /* XXX */ 15745749Smckusick 15845749Smckusick #ifdef DEBUG 15945749Smckusick if (vpagerdebug & VDB_FOLLOW) 16045749Smckusick printf("vnode_pager_dealloc(%x)\n", pager); 16145749Smckusick #endif 16245749Smckusick if (vp = vnp->vnp_vp) { 16345749Smckusick vp->v_vmdata = NULL; 16445749Smckusick vp->v_flag &= ~VTEXT; 16545749Smckusick #if 0 16645749Smckusick /* can hang if done at reboot on NFS FS */ 16748042Smckusick (void) VOP_FSYNC(vp, p->p_ucred, p); 16845749Smckusick #endif 16945749Smckusick vrele(vp); 17045749Smckusick } 17145749Smckusick queue_remove(&vnode_pager_list, pager, vm_pager_t, pg_list); 17245749Smckusick free((caddr_t)vnp, M_VMPGDATA); 17345749Smckusick free((caddr_t)pager, M_VMPAGER); 17445749Smckusick } 17545749Smckusick 17645749Smckusick vnode_pager_getpage(pager, m, sync) 17745749Smckusick vm_pager_t pager; 17845749Smckusick vm_page_t m; 17945749Smckusick boolean_t sync; 18045749Smckusick { 18145749Smckusick 18245749Smckusick #ifdef DEBUG 18345749Smckusick if (vpagerdebug & VDB_FOLLOW) 18445749Smckusick printf("vnode_pager_getpage(%x, %x)\n", pager, m); 18545749Smckusick #endif 18645749Smckusick return(vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_READ)); 18745749Smckusick } 18845749Smckusick 18945749Smckusick boolean_t 19045749Smckusick vnode_pager_putpage(pager, m, sync) 19145749Smckusick vm_pager_t pager; 19245749Smckusick vm_page_t m; 19345749Smckusick boolean_t sync; 19445749Smckusick { 19545749Smckusick int err; 19645749Smckusick 19745749Smckusick #ifdef DEBUG 19845749Smckusick if (vpagerdebug & VDB_FOLLOW) 19945749Smckusick printf("vnode_pager_putpage(%x, %x)\n", pager, m); 20045749Smckusick #endif 20145749Smckusick if (pager == VM_PAGER_NULL) 20245749Smckusick return; 20345749Smckusick err = vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_WRITE); 20445749Smckusick if (err == VM_PAGER_OK) { 20545749Smckusick m->clean = TRUE; /* XXX - wrong place */ 20645749Smckusick pmap_clear_modify(VM_PAGE_TO_PHYS(m)); /* XXX - wrong place */ 20745749Smckusick } 20845749Smckusick return(err); 20945749Smckusick } 21045749Smckusick 21145749Smckusick boolean_t 21245749Smckusick vnode_pager_haspage(pager, offset) 21345749Smckusick vm_pager_t pager; 21445749Smckusick vm_offset_t offset; 21545749Smckusick { 21645749Smckusick register vn_pager_t vnp = (vn_pager_t)pager->pg_data; 21745749Smckusick daddr_t bn; 21845749Smckusick int err; 21945749Smckusick 22045749Smckusick #ifdef DEBUG 22145749Smckusick if (vpagerdebug & VDB_FOLLOW) 22245749Smckusick printf("vnode_pager_haspage(%x, %x)\n", pager, offset); 22345749Smckusick #endif 22445749Smckusick 22545749Smckusick /* 22645749Smckusick * Offset beyond end of file, do not have the page 22745749Smckusick */ 22845749Smckusick if (offset >= vnp->vnp_size) { 22945749Smckusick #ifdef DEBUG 23045749Smckusick if (vpagerdebug & (VDB_FAIL|VDB_SIZE)) 23145749Smckusick printf("vnode_pager_haspage: pg %x, off %x, size %x\n", 23245749Smckusick pager, offset, vnp->vnp_size); 23345749Smckusick #endif 23445749Smckusick return(FALSE); 23545749Smckusick } 23645749Smckusick 23745749Smckusick /* 23845749Smckusick * Read the index to find the disk block to read 23945749Smckusick * from. If there is no block, report that we don't 24045749Smckusick * have this data. 24145749Smckusick * 24245749Smckusick * Assumes that the vnode has whole page or nothing. 24345749Smckusick */ 24445749Smckusick err = VOP_BMAP(vnp->vnp_vp, 24545749Smckusick offset / vnp->vnp_vp->v_mount->mnt_stat.f_bsize, 246*48362Smckusick (struct vnode **)0, &bn); 24745749Smckusick if (err) { 24845749Smckusick #ifdef DEBUG 24945749Smckusick if (vpagerdebug & VDB_FAIL) 25045749Smckusick printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n", 25145749Smckusick err, pager, offset); 25245749Smckusick #endif 25345749Smckusick return(TRUE); 25445749Smckusick } 25545749Smckusick return((long)bn < 0 ? FALSE : TRUE); 25645749Smckusick } 25745749Smckusick 25845749Smckusick /* 25945749Smckusick * (XXX) 26045749Smckusick * Lets the VM system know about a change in size for a file. 26145749Smckusick * If this vnode is mapped into some address space (i.e. we have a pager 26245749Smckusick * for it) we adjust our own internal size and flush any cached pages in 26345749Smckusick * the associated object that are affected by the size change. 26445749Smckusick * 26545749Smckusick * Note: this routine may be invoked as a result of a pager put 26645749Smckusick * operation (possibly at object termination time), so we must be careful. 26745749Smckusick */ 26845749Smckusick vnode_pager_setsize(vp, nsize) 26945749Smckusick struct vnode *vp; 27045749Smckusick u_long nsize; 27145749Smckusick { 27245749Smckusick register vn_pager_t vnp; 27345749Smckusick register vm_object_t object; 27445749Smckusick vm_pager_t pager; 27545749Smckusick 27645749Smckusick /* 27745749Smckusick * Not a mapped vnode 27845749Smckusick */ 27945749Smckusick if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) 28045749Smckusick return; 28145749Smckusick /* 28245749Smckusick * Hasn't changed size 28345749Smckusick */ 28445749Smckusick pager = (vm_pager_t)vp->v_vmdata; 28545749Smckusick vnp = (vn_pager_t)pager->pg_data; 28645749Smckusick if (nsize == vnp->vnp_size) 28745749Smckusick return; 28845749Smckusick /* 28945749Smckusick * No object. 29045749Smckusick * This can happen during object termination since 29145749Smckusick * vm_object_page_clean is called after the object 29245749Smckusick * has been removed from the hash table, and clean 29345749Smckusick * may cause vnode write operations which can wind 29445749Smckusick * up back here. 29545749Smckusick */ 29645749Smckusick object = vm_object_lookup(pager); 29745749Smckusick if (object == VM_OBJECT_NULL) 29845749Smckusick return; 29945749Smckusick 30045749Smckusick #ifdef DEBUG 30145749Smckusick if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE)) 30245749Smckusick printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n", 30345749Smckusick vp, object, vnp->vnp_size, nsize); 30445749Smckusick #endif 30545749Smckusick /* 30645749Smckusick * File has shrunk. 30745749Smckusick * Toss any cached pages beyond the new EOF. 30845749Smckusick */ 30945749Smckusick if (nsize < vnp->vnp_size) { 31045749Smckusick vm_object_lock(object); 31145749Smckusick vm_object_page_remove(object, 31245749Smckusick (vm_offset_t)nsize, vnp->vnp_size); 31345749Smckusick vm_object_unlock(object); 31445749Smckusick } 31545749Smckusick vnp->vnp_size = (vm_offset_t)nsize; 31645749Smckusick vm_object_deallocate(object); 31745749Smckusick } 31845749Smckusick 31945749Smckusick vnode_pager_umount(mp) 32045749Smckusick register struct mount *mp; 32145749Smckusick { 32245749Smckusick register vm_pager_t pager, npager; 32345749Smckusick struct vnode *vp; 32445749Smckusick 32545749Smckusick pager = (vm_pager_t) queue_first(&vnode_pager_list); 32645749Smckusick while (!queue_end(&vnode_pager_list, (queue_entry_t)pager)) { 32745749Smckusick /* 32845749Smckusick * Save the next pointer now since uncaching may 32945749Smckusick * terminate the object and render pager invalid 33045749Smckusick */ 33145749Smckusick vp = ((vn_pager_t)pager->pg_data)->vnp_vp; 33245749Smckusick npager = (vm_pager_t) queue_next(&pager->pg_list); 33345749Smckusick if (mp == (struct mount *)0 || vp->v_mount == mp) 33445749Smckusick (void) vnode_pager_uncache(vp); 33545749Smckusick pager = npager; 33645749Smckusick } 33745749Smckusick } 33845749Smckusick 33945749Smckusick /* 34045749Smckusick * Remove vnode associated object from the object cache. 34145749Smckusick * 34245749Smckusick * Note: this routine may be invoked as a result of a pager put 34345749Smckusick * operation (possibly at object termination time), so we must be careful. 34445749Smckusick */ 34545749Smckusick boolean_t 34645749Smckusick vnode_pager_uncache(vp) 34745749Smckusick register struct vnode *vp; 34845749Smckusick { 34945749Smckusick register vm_object_t object; 35045749Smckusick boolean_t uncached, locked; 35145749Smckusick vm_pager_t pager; 35245749Smckusick 35345749Smckusick /* 35445749Smckusick * Not a mapped vnode 35545749Smckusick */ 35645749Smckusick pager = (vm_pager_t)vp->v_vmdata; 35745749Smckusick if (pager == vm_pager_null) 35845749Smckusick return (TRUE); 35945749Smckusick /* 36045749Smckusick * Unlock the vnode if it is currently locked. 36145749Smckusick * We do this since uncaching the object may result 36245749Smckusick * in its destruction which may initiate paging 36345749Smckusick * activity which may necessitate locking the vnode. 36445749Smckusick */ 36545749Smckusick locked = VOP_ISLOCKED(vp); 36645749Smckusick if (locked) 36745749Smckusick VOP_UNLOCK(vp); 36845749Smckusick /* 36945749Smckusick * Must use vm_object_lookup() as it actually removes 37045749Smckusick * the object from the cache list. 37145749Smckusick */ 37245749Smckusick object = vm_object_lookup(pager); 37345749Smckusick if (object) { 37445749Smckusick uncached = (object->ref_count <= 1); 37545749Smckusick pager_cache(object, FALSE); 37645749Smckusick } else 37745749Smckusick uncached = TRUE; 37845749Smckusick if (locked) 37945749Smckusick VOP_LOCK(vp); 38045749Smckusick return(uncached); 38145749Smckusick } 38245749Smckusick 38345749Smckusick vnode_pager_io(vnp, m, rw) 38445749Smckusick register vn_pager_t vnp; 38545749Smckusick vm_page_t m; 38645749Smckusick enum uio_rw rw; 38745749Smckusick { 38845749Smckusick struct uio auio; 38945749Smckusick struct iovec aiov; 39045749Smckusick vm_offset_t kva, foff; 39145749Smckusick int error, size; 39248042Smckusick struct proc *p = curproc; /* XXX */ 39345749Smckusick 39445749Smckusick #ifdef DEBUG 39545749Smckusick if (vpagerdebug & VDB_FOLLOW) 39645749Smckusick printf("vnode_pager_io(%x, %x, %c): vnode %x\n", 39745749Smckusick vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp); 39845749Smckusick #endif 39945749Smckusick foff = m->offset + m->object->paging_offset; 40045749Smckusick /* 40145749Smckusick * Return failure if beyond current EOF 40245749Smckusick */ 40345749Smckusick if (foff >= vnp->vnp_size) { 40445749Smckusick #ifdef DEBUG 40545749Smckusick if (vpagerdebug & VDB_SIZE) 40645749Smckusick printf("vnode_pager_io: vp %x, off %d size %d\n", 40745749Smckusick vnp->vnp_vp, foff, vnp->vnp_size); 40845749Smckusick #endif 40945749Smckusick return(VM_PAGER_BAD); 41045749Smckusick } 41145749Smckusick if (foff + PAGE_SIZE > vnp->vnp_size) 41245749Smckusick size = vnp->vnp_size - foff; 41345749Smckusick else 41445749Smckusick size = PAGE_SIZE; 41545749Smckusick /* 41645749Smckusick * Allocate a kernel virtual address and initialize so that 41745749Smckusick * we can use VOP_READ/WRITE routines. 41845749Smckusick */ 41945749Smckusick kva = vm_pager_map_page(m); 42045749Smckusick aiov.iov_base = (caddr_t)kva; 42145749Smckusick aiov.iov_len = size; 42245749Smckusick auio.uio_iov = &aiov; 42345749Smckusick auio.uio_iovcnt = 1; 42445749Smckusick auio.uio_offset = foff; 42545749Smckusick auio.uio_segflg = UIO_SYSSPACE; 42645749Smckusick auio.uio_rw = rw; 42745749Smckusick auio.uio_resid = size; 42848042Smckusick auio.uio_procp = (struct proc *)0; 42945749Smckusick #ifdef DEBUG 43045749Smckusick if (vpagerdebug & VDB_IO) 43145749Smckusick printf("vnode_pager_io: vp %x kva %x foff %x size %x", 43245749Smckusick vnp->vnp_vp, kva, foff, size); 43345749Smckusick #endif 43445749Smckusick if (rw == UIO_READ) 43548042Smckusick error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred); 43645749Smckusick else 43748042Smckusick error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred); 43845749Smckusick #ifdef DEBUG 43945749Smckusick if (vpagerdebug & VDB_IO) { 44045749Smckusick if (error || auio.uio_resid) 44145749Smckusick printf(" returns error %x, resid %x", 44245749Smckusick error, auio.uio_resid); 44345749Smckusick printf("\n"); 44445749Smckusick } 44545749Smckusick #endif 44645749Smckusick if (!error) { 44745749Smckusick register int count = size - auio.uio_resid; 44845749Smckusick 44945749Smckusick if (count == 0) 45045749Smckusick error = EINVAL; 45145749Smckusick else if (count != PAGE_SIZE && rw == UIO_READ) 45245749Smckusick bzero(kva + count, PAGE_SIZE - count); 45345749Smckusick } 45445749Smckusick vm_pager_unmap_page(kva); 45545749Smckusick return (error ? VM_PAGER_FAIL : VM_PAGER_OK); 45645749Smckusick } 45745749Smckusick #endif 458