1*45749Smckusick /* 2*45749Smckusick * Copyright (c) 1990 University of Utah. 3*45749Smckusick * Copyright (c) 1991 The Regents of the University of California. 4*45749Smckusick * All rights reserved. 5*45749Smckusick * 6*45749Smckusick * This code is derived from software contributed to Berkeley by 7*45749Smckusick * the Systems Programming Group of the University of Utah Computer 8*45749Smckusick * Science Department. 9*45749Smckusick * 10*45749Smckusick * %sccs.include.redist.c% 11*45749Smckusick * 12*45749Smckusick * @(#)vnode_pager.c 7.1 (Berkeley) 12/05/90 13*45749Smckusick */ 14*45749Smckusick 15*45749Smckusick /* 16*45749Smckusick * Page to/from files (vnodes). 17*45749Smckusick * 18*45749Smckusick * TODO: 19*45749Smckusick * pageouts 20*45749Smckusick */ 21*45749Smckusick #include "vnodepager.h" 22*45749Smckusick #if NVNODEPAGER > 0 23*45749Smckusick 24*45749Smckusick #include "param.h" 25*45749Smckusick #include "user.h" 26*45749Smckusick #include "malloc.h" 27*45749Smckusick #include "vnode.h" 28*45749Smckusick #include "uio.h" 29*45749Smckusick #include "mount.h" 30*45749Smckusick #include "queue.h" 31*45749Smckusick 32*45749Smckusick #include "../vm/vm_param.h" 33*45749Smckusick #include "../vm/vm_pager.h" 34*45749Smckusick #include "../vm/vm_page.h" 35*45749Smckusick #include "../vm/vnode_pager.h" 36*45749Smckusick 37*45749Smckusick queue_head_t vnode_pager_list; /* list of managed vnodes */ 38*45749Smckusick 39*45749Smckusick #ifdef DEBUG 40*45749Smckusick int vpagerdebug = 0x00; 41*45749Smckusick #define VDB_FOLLOW 0x01 42*45749Smckusick #define VDB_INIT 0x02 43*45749Smckusick #define VDB_IO 0x04 44*45749Smckusick #define VDB_FAIL 0x08 45*45749Smckusick #define VDB_ALLOC 0x10 46*45749Smckusick #define VDB_SIZE 0x20 47*45749Smckusick #endif 48*45749Smckusick 49*45749Smckusick void 50*45749Smckusick vnode_pager_init() 51*45749Smckusick { 52*45749Smckusick #ifdef DEBUG 53*45749Smckusick if (vpagerdebug & VDB_FOLLOW) 54*45749Smckusick printf("vnode_pager_init()\n"); 55*45749Smckusick #endif 56*45749Smckusick queue_init(&vnode_pager_list); 57*45749Smckusick } 58*45749Smckusick 59*45749Smckusick /* 60*45749Smckusick * Allocate (or lookup) pager for a vnode. 61*45749Smckusick * Handle is a vnode pointer. 62*45749Smckusick */ 63*45749Smckusick vm_pager_t 64*45749Smckusick vnode_pager_alloc(handle, size, prot) 65*45749Smckusick caddr_t handle; 66*45749Smckusick vm_size_t size; 67*45749Smckusick vm_prot_t prot; 68*45749Smckusick { 69*45749Smckusick register vm_pager_t pager; 70*45749Smckusick register vn_pager_t vnp; 71*45749Smckusick vm_object_t object; 72*45749Smckusick struct vattr vattr; 73*45749Smckusick struct vnode *vp; 74*45749Smckusick 75*45749Smckusick #ifdef DEBUG 76*45749Smckusick if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC)) 77*45749Smckusick printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot); 78*45749Smckusick #endif 79*45749Smckusick /* 80*45749Smckusick * Pageout to vnode, no can do yet. 81*45749Smckusick */ 82*45749Smckusick if (handle == NULL) 83*45749Smckusick return(VM_PAGER_NULL); 84*45749Smckusick 85*45749Smckusick /* 86*45749Smckusick * Vnodes keep a pointer to any associated pager so no need to 87*45749Smckusick * lookup with vm_pager_lookup. 88*45749Smckusick */ 89*45749Smckusick vp = (struct vnode *)handle; 90*45749Smckusick pager = (vm_pager_t)vp->v_vmdata; 91*45749Smckusick if (pager == VM_PAGER_NULL) { 92*45749Smckusick /* 93*45749Smckusick * Allocate pager structures 94*45749Smckusick */ 95*45749Smckusick pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK); 96*45749Smckusick if (pager == VM_PAGER_NULL) 97*45749Smckusick return(VM_PAGER_NULL); 98*45749Smckusick vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); 99*45749Smckusick if (vnp == VN_PAGER_NULL) { 100*45749Smckusick free((caddr_t)pager, M_VMPAGER); 101*45749Smckusick return(VM_PAGER_NULL); 102*45749Smckusick } 103*45749Smckusick /* 104*45749Smckusick * And an object of the appropriate size 105*45749Smckusick */ 106*45749Smckusick if (VOP_GETATTR(vp, &vattr, u.u_cred) == 0) { 107*45749Smckusick object = vm_object_allocate(round_page(vattr.va_size)); 108*45749Smckusick vm_object_enter(object, pager); 109*45749Smckusick vm_object_setpager(object, pager, 0, TRUE); 110*45749Smckusick } else { 111*45749Smckusick free((caddr_t)vnp, M_VMPGDATA); 112*45749Smckusick free((caddr_t)pager, M_VMPAGER); 113*45749Smckusick return(VM_PAGER_NULL); 114*45749Smckusick } 115*45749Smckusick /* 116*45749Smckusick * Hold a reference to the vnode and initialize pager data. 117*45749Smckusick */ 118*45749Smckusick VREF(vp); 119*45749Smckusick vnp->vnp_flags = 0; 120*45749Smckusick vnp->vnp_vp = vp; 121*45749Smckusick vnp->vnp_size = vattr.va_size; 122*45749Smckusick queue_enter(&vnode_pager_list, pager, vm_pager_t, pg_list); 123*45749Smckusick pager->pg_handle = handle; 124*45749Smckusick pager->pg_type = PG_VNODE; 125*45749Smckusick pager->pg_ops = &vnodepagerops; 126*45749Smckusick pager->pg_data = (caddr_t)vnp; 127*45749Smckusick vp->v_vmdata = (caddr_t)pager; 128*45749Smckusick } else { 129*45749Smckusick /* 130*45749Smckusick * vm_object_lookup() will remove the object from the 131*45749Smckusick * cache if found and also gain a reference to the object. 132*45749Smckusick */ 133*45749Smckusick object = vm_object_lookup(pager); 134*45749Smckusick vnp = (vn_pager_t)pager->pg_data; 135*45749Smckusick } 136*45749Smckusick if (prot & VM_PROT_EXECUTE) 137*45749Smckusick vp->v_flag |= VTEXT; /* XXX */ 138*45749Smckusick #ifdef DEBUG 139*45749Smckusick if (vpagerdebug & VDB_ALLOC) 140*45749Smckusick printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n", 141*45749Smckusick vp, vnp->vnp_size, pager, object); 142*45749Smckusick #endif 143*45749Smckusick return(pager); 144*45749Smckusick } 145*45749Smckusick 146*45749Smckusick void 147*45749Smckusick vnode_pager_dealloc(pager) 148*45749Smckusick vm_pager_t pager; 149*45749Smckusick { 150*45749Smckusick register vn_pager_t vnp = (vn_pager_t)pager->pg_data; 151*45749Smckusick register struct vnode *vp; 152*45749Smckusick 153*45749Smckusick #ifdef DEBUG 154*45749Smckusick if (vpagerdebug & VDB_FOLLOW) 155*45749Smckusick printf("vnode_pager_dealloc(%x)\n", pager); 156*45749Smckusick #endif 157*45749Smckusick if (vp = vnp->vnp_vp) { 158*45749Smckusick vp->v_vmdata = NULL; 159*45749Smckusick vp->v_flag &= ~VTEXT; 160*45749Smckusick #if 0 161*45749Smckusick /* can hang if done at reboot on NFS FS */ 162*45749Smckusick (void) VOP_FSYNC(vp, u.u_cred); 163*45749Smckusick #endif 164*45749Smckusick vrele(vp); 165*45749Smckusick } 166*45749Smckusick queue_remove(&vnode_pager_list, pager, vm_pager_t, pg_list); 167*45749Smckusick free((caddr_t)vnp, M_VMPGDATA); 168*45749Smckusick free((caddr_t)pager, M_VMPAGER); 169*45749Smckusick } 170*45749Smckusick 171*45749Smckusick vnode_pager_getpage(pager, m, sync) 172*45749Smckusick vm_pager_t pager; 173*45749Smckusick vm_page_t m; 174*45749Smckusick boolean_t sync; 175*45749Smckusick { 176*45749Smckusick 177*45749Smckusick #ifdef DEBUG 178*45749Smckusick if (vpagerdebug & VDB_FOLLOW) 179*45749Smckusick printf("vnode_pager_getpage(%x, %x)\n", pager, m); 180*45749Smckusick #endif 181*45749Smckusick return(vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_READ)); 182*45749Smckusick } 183*45749Smckusick 184*45749Smckusick boolean_t 185*45749Smckusick vnode_pager_putpage(pager, m, sync) 186*45749Smckusick vm_pager_t pager; 187*45749Smckusick vm_page_t m; 188*45749Smckusick boolean_t sync; 189*45749Smckusick { 190*45749Smckusick int err; 191*45749Smckusick 192*45749Smckusick #ifdef DEBUG 193*45749Smckusick if (vpagerdebug & VDB_FOLLOW) 194*45749Smckusick printf("vnode_pager_putpage(%x, %x)\n", pager, m); 195*45749Smckusick #endif 196*45749Smckusick if (pager == VM_PAGER_NULL) 197*45749Smckusick return; 198*45749Smckusick err = vnode_pager_io((vn_pager_t)pager->pg_data, m, UIO_WRITE); 199*45749Smckusick if (err == VM_PAGER_OK) { 200*45749Smckusick m->clean = TRUE; /* XXX - wrong place */ 201*45749Smckusick pmap_clear_modify(VM_PAGE_TO_PHYS(m)); /* XXX - wrong place */ 202*45749Smckusick } 203*45749Smckusick return(err); 204*45749Smckusick } 205*45749Smckusick 206*45749Smckusick boolean_t 207*45749Smckusick vnode_pager_haspage(pager, offset) 208*45749Smckusick vm_pager_t pager; 209*45749Smckusick vm_offset_t offset; 210*45749Smckusick { 211*45749Smckusick register vn_pager_t vnp = (vn_pager_t)pager->pg_data; 212*45749Smckusick daddr_t bn; 213*45749Smckusick int err; 214*45749Smckusick 215*45749Smckusick #ifdef DEBUG 216*45749Smckusick if (vpagerdebug & VDB_FOLLOW) 217*45749Smckusick printf("vnode_pager_haspage(%x, %x)\n", pager, offset); 218*45749Smckusick #endif 219*45749Smckusick 220*45749Smckusick /* 221*45749Smckusick * Offset beyond end of file, do not have the page 222*45749Smckusick */ 223*45749Smckusick if (offset >= vnp->vnp_size) { 224*45749Smckusick #ifdef DEBUG 225*45749Smckusick if (vpagerdebug & (VDB_FAIL|VDB_SIZE)) 226*45749Smckusick printf("vnode_pager_haspage: pg %x, off %x, size %x\n", 227*45749Smckusick pager, offset, vnp->vnp_size); 228*45749Smckusick #endif 229*45749Smckusick return(FALSE); 230*45749Smckusick } 231*45749Smckusick 232*45749Smckusick /* 233*45749Smckusick * Read the index to find the disk block to read 234*45749Smckusick * from. If there is no block, report that we don't 235*45749Smckusick * have this data. 236*45749Smckusick * 237*45749Smckusick * Assumes that the vnode has whole page or nothing. 238*45749Smckusick */ 239*45749Smckusick err = VOP_BMAP(vnp->vnp_vp, 240*45749Smckusick offset / vnp->vnp_vp->v_mount->mnt_stat.f_bsize, 241*45749Smckusick (struct vnode *)0, &bn); 242*45749Smckusick if (err) { 243*45749Smckusick #ifdef DEBUG 244*45749Smckusick if (vpagerdebug & VDB_FAIL) 245*45749Smckusick printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n", 246*45749Smckusick err, pager, offset); 247*45749Smckusick #endif 248*45749Smckusick return(TRUE); 249*45749Smckusick } 250*45749Smckusick return((long)bn < 0 ? FALSE : TRUE); 251*45749Smckusick } 252*45749Smckusick 253*45749Smckusick /* 254*45749Smckusick * (XXX) 255*45749Smckusick * Lets the VM system know about a change in size for a file. 256*45749Smckusick * If this vnode is mapped into some address space (i.e. we have a pager 257*45749Smckusick * for it) we adjust our own internal size and flush any cached pages in 258*45749Smckusick * the associated object that are affected by the size change. 259*45749Smckusick * 260*45749Smckusick * Note: this routine may be invoked as a result of a pager put 261*45749Smckusick * operation (possibly at object termination time), so we must be careful. 262*45749Smckusick */ 263*45749Smckusick vnode_pager_setsize(vp, nsize) 264*45749Smckusick struct vnode *vp; 265*45749Smckusick u_long nsize; 266*45749Smckusick { 267*45749Smckusick register vn_pager_t vnp; 268*45749Smckusick register vm_object_t object; 269*45749Smckusick vm_pager_t pager; 270*45749Smckusick 271*45749Smckusick /* 272*45749Smckusick * Not a mapped vnode 273*45749Smckusick */ 274*45749Smckusick if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) 275*45749Smckusick return; 276*45749Smckusick /* 277*45749Smckusick * Hasn't changed size 278*45749Smckusick */ 279*45749Smckusick pager = (vm_pager_t)vp->v_vmdata; 280*45749Smckusick vnp = (vn_pager_t)pager->pg_data; 281*45749Smckusick if (nsize == vnp->vnp_size) 282*45749Smckusick return; 283*45749Smckusick /* 284*45749Smckusick * No object. 285*45749Smckusick * This can happen during object termination since 286*45749Smckusick * vm_object_page_clean is called after the object 287*45749Smckusick * has been removed from the hash table, and clean 288*45749Smckusick * may cause vnode write operations which can wind 289*45749Smckusick * up back here. 290*45749Smckusick */ 291*45749Smckusick object = vm_object_lookup(pager); 292*45749Smckusick if (object == VM_OBJECT_NULL) 293*45749Smckusick return; 294*45749Smckusick 295*45749Smckusick #ifdef DEBUG 296*45749Smckusick if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE)) 297*45749Smckusick printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n", 298*45749Smckusick vp, object, vnp->vnp_size, nsize); 299*45749Smckusick #endif 300*45749Smckusick /* 301*45749Smckusick * File has shrunk. 302*45749Smckusick * Toss any cached pages beyond the new EOF. 303*45749Smckusick */ 304*45749Smckusick if (nsize < vnp->vnp_size) { 305*45749Smckusick vm_object_lock(object); 306*45749Smckusick vm_object_page_remove(object, 307*45749Smckusick (vm_offset_t)nsize, vnp->vnp_size); 308*45749Smckusick vm_object_unlock(object); 309*45749Smckusick } 310*45749Smckusick vnp->vnp_size = (vm_offset_t)nsize; 311*45749Smckusick vm_object_deallocate(object); 312*45749Smckusick } 313*45749Smckusick 314*45749Smckusick vnode_pager_umount(mp) 315*45749Smckusick register struct mount *mp; 316*45749Smckusick { 317*45749Smckusick register vm_pager_t pager, npager; 318*45749Smckusick struct vnode *vp; 319*45749Smckusick 320*45749Smckusick pager = (vm_pager_t) queue_first(&vnode_pager_list); 321*45749Smckusick while (!queue_end(&vnode_pager_list, (queue_entry_t)pager)) { 322*45749Smckusick /* 323*45749Smckusick * Save the next pointer now since uncaching may 324*45749Smckusick * terminate the object and render pager invalid 325*45749Smckusick */ 326*45749Smckusick vp = ((vn_pager_t)pager->pg_data)->vnp_vp; 327*45749Smckusick npager = (vm_pager_t) queue_next(&pager->pg_list); 328*45749Smckusick if (mp == (struct mount *)0 || vp->v_mount == mp) 329*45749Smckusick (void) vnode_pager_uncache(vp); 330*45749Smckusick pager = npager; 331*45749Smckusick } 332*45749Smckusick } 333*45749Smckusick 334*45749Smckusick /* 335*45749Smckusick * Remove vnode associated object from the object cache. 336*45749Smckusick * 337*45749Smckusick * Note: this routine may be invoked as a result of a pager put 338*45749Smckusick * operation (possibly at object termination time), so we must be careful. 339*45749Smckusick */ 340*45749Smckusick boolean_t 341*45749Smckusick vnode_pager_uncache(vp) 342*45749Smckusick register struct vnode *vp; 343*45749Smckusick { 344*45749Smckusick register vm_object_t object; 345*45749Smckusick boolean_t uncached, locked; 346*45749Smckusick vm_pager_t pager; 347*45749Smckusick 348*45749Smckusick /* 349*45749Smckusick * Not a mapped vnode 350*45749Smckusick */ 351*45749Smckusick pager = (vm_pager_t)vp->v_vmdata; 352*45749Smckusick if (pager == vm_pager_null) 353*45749Smckusick return (TRUE); 354*45749Smckusick /* 355*45749Smckusick * Unlock the vnode if it is currently locked. 356*45749Smckusick * We do this since uncaching the object may result 357*45749Smckusick * in its destruction which may initiate paging 358*45749Smckusick * activity which may necessitate locking the vnode. 359*45749Smckusick */ 360*45749Smckusick locked = VOP_ISLOCKED(vp); 361*45749Smckusick if (locked) 362*45749Smckusick VOP_UNLOCK(vp); 363*45749Smckusick /* 364*45749Smckusick * Must use vm_object_lookup() as it actually removes 365*45749Smckusick * the object from the cache list. 366*45749Smckusick */ 367*45749Smckusick object = vm_object_lookup(pager); 368*45749Smckusick if (object) { 369*45749Smckusick uncached = (object->ref_count <= 1); 370*45749Smckusick pager_cache(object, FALSE); 371*45749Smckusick } else 372*45749Smckusick uncached = TRUE; 373*45749Smckusick if (locked) 374*45749Smckusick VOP_LOCK(vp); 375*45749Smckusick return(uncached); 376*45749Smckusick } 377*45749Smckusick 378*45749Smckusick vnode_pager_io(vnp, m, rw) 379*45749Smckusick register vn_pager_t vnp; 380*45749Smckusick vm_page_t m; 381*45749Smckusick enum uio_rw rw; 382*45749Smckusick { 383*45749Smckusick struct uio auio; 384*45749Smckusick struct iovec aiov; 385*45749Smckusick vm_offset_t kva, foff; 386*45749Smckusick int error, size; 387*45749Smckusick 388*45749Smckusick #ifdef DEBUG 389*45749Smckusick if (vpagerdebug & VDB_FOLLOW) 390*45749Smckusick printf("vnode_pager_io(%x, %x, %c): vnode %x\n", 391*45749Smckusick vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp); 392*45749Smckusick #endif 393*45749Smckusick foff = m->offset + m->object->paging_offset; 394*45749Smckusick /* 395*45749Smckusick * Return failure if beyond current EOF 396*45749Smckusick */ 397*45749Smckusick if (foff >= vnp->vnp_size) { 398*45749Smckusick #ifdef DEBUG 399*45749Smckusick if (vpagerdebug & VDB_SIZE) 400*45749Smckusick printf("vnode_pager_io: vp %x, off %d size %d\n", 401*45749Smckusick vnp->vnp_vp, foff, vnp->vnp_size); 402*45749Smckusick #endif 403*45749Smckusick return(VM_PAGER_BAD); 404*45749Smckusick } 405*45749Smckusick if (foff + PAGE_SIZE > vnp->vnp_size) 406*45749Smckusick size = vnp->vnp_size - foff; 407*45749Smckusick else 408*45749Smckusick size = PAGE_SIZE; 409*45749Smckusick /* 410*45749Smckusick * Allocate a kernel virtual address and initialize so that 411*45749Smckusick * we can use VOP_READ/WRITE routines. 412*45749Smckusick */ 413*45749Smckusick kva = vm_pager_map_page(m); 414*45749Smckusick aiov.iov_base = (caddr_t)kva; 415*45749Smckusick aiov.iov_len = size; 416*45749Smckusick auio.uio_iov = &aiov; 417*45749Smckusick auio.uio_iovcnt = 1; 418*45749Smckusick auio.uio_offset = foff; 419*45749Smckusick auio.uio_segflg = UIO_SYSSPACE; 420*45749Smckusick auio.uio_rw = rw; 421*45749Smckusick auio.uio_resid = size; 422*45749Smckusick #ifdef DEBUG 423*45749Smckusick if (vpagerdebug & VDB_IO) 424*45749Smckusick printf("vnode_pager_io: vp %x kva %x foff %x size %x", 425*45749Smckusick vnp->vnp_vp, kva, foff, size); 426*45749Smckusick #endif 427*45749Smckusick if (rw == UIO_READ) 428*45749Smckusick error = VOP_READ(vnp->vnp_vp, &auio, 0, u.u_cred); 429*45749Smckusick else 430*45749Smckusick error = VOP_WRITE(vnp->vnp_vp, &auio, 0, u.u_cred); 431*45749Smckusick #ifdef DEBUG 432*45749Smckusick if (vpagerdebug & VDB_IO) { 433*45749Smckusick if (error || auio.uio_resid) 434*45749Smckusick printf(" returns error %x, resid %x", 435*45749Smckusick error, auio.uio_resid); 436*45749Smckusick printf("\n"); 437*45749Smckusick } 438*45749Smckusick #endif 439*45749Smckusick if (!error) { 440*45749Smckusick register int count = size - auio.uio_resid; 441*45749Smckusick 442*45749Smckusick if (count == 0) 443*45749Smckusick error = EINVAL; 444*45749Smckusick else if (count != PAGE_SIZE && rw == UIO_READ) 445*45749Smckusick bzero(kva + count, PAGE_SIZE - count); 446*45749Smckusick } 447*45749Smckusick vm_pager_unmap_page(kva); 448*45749Smckusick return (error ? VM_PAGER_FAIL : VM_PAGER_OK); 449*45749Smckusick } 450*45749Smckusick #endif 451