11841Spraks /* 21841Spraks * CDDL HEADER START 31841Spraks * 41841Spraks * The contents of this file are subject to the terms of the 51841Spraks * Common Development and Distribution License (the "License"). 61841Spraks * You may not use this file except in compliance with the License. 71841Spraks * 81841Spraks * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91841Spraks * or http://www.opensolaris.org/os/licensing. 101841Spraks * See the License for the specific language governing permissions 111841Spraks * and limitations under the License. 121841Spraks * 131841Spraks * When distributing Covered Code, include this CDDL HEADER in each 141841Spraks * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151841Spraks * If applicable, add the following below this CDDL HEADER, with the 161841Spraks * fields enclosed by brackets "[]" replaced with your own identifying 171841Spraks * information: Portions Copyright [yyyy] [name of copyright owner] 181841Spraks * 191841Spraks * CDDL HEADER END 201841Spraks */ 211841Spraks /* 22*9281SPrakash.Sangappa@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 231841Spraks * Use is subject to license terms. 241841Spraks */ 251841Spraks 261841Spraks 271841Spraks /* 281841Spraks * VM - generic vnode page mapping interfaces. 291841Spraks * 301841Spraks * Mechanism to provide temporary mappings to vnode pages. 311841Spraks * The typical use would be to copy/access file data. 321841Spraks */ 331841Spraks 341841Spraks #include <sys/types.h> 351841Spraks #include <sys/t_lock.h> 361841Spraks #include <sys/param.h> 371841Spraks #include <sys/sysmacros.h> 381841Spraks #include <sys/buf.h> 391841Spraks #include <sys/systm.h> 401841Spraks #include <sys/vnode.h> 411841Spraks #include <sys/mman.h> 421841Spraks #include <sys/errno.h> 431841Spraks #include <sys/cred.h> 441841Spraks #include <sys/kmem.h> 451841Spraks #include <sys/vtrace.h> 461841Spraks #include <sys/cmn_err.h> 471841Spraks #include <sys/debug.h> 481841Spraks #include <sys/thread.h> 491841Spraks #include <sys/dumphdr.h> 501841Spraks #include <sys/bitmap.h> 511841Spraks #include <sys/lgrp.h> 521841Spraks 531841Spraks #include <vm/seg_kmem.h> 541841Spraks #include <vm/hat.h> 551841Spraks #include <vm/as.h> 561841Spraks #include <vm/seg.h> 571841Spraks #include <vm/seg_kpm.h> 581841Spraks #include <vm/seg_map.h> 591841Spraks #include <vm/page.h> 601841Spraks #include <vm/pvn.h> 611841Spraks #include <vm/rm.h> 621841Spraks #include <vm/vpm.h> 631841Spraks 64*9281SPrakash.Sangappa@Sun.COM 65*9281SPrakash.Sangappa@Sun.COM #ifdef SEGKPM_SUPPORT 661841Spraks /* 67*9281SPrakash.Sangappa@Sun.COM * VPM can be disabled by setting vpm_enable = 0 in 68*9281SPrakash.Sangappa@Sun.COM * /etc/system. 69*9281SPrakash.Sangappa@Sun.COM * 701841Spraks */ 71*9281SPrakash.Sangappa@Sun.COM int vpm_enable = 1; 72*9281SPrakash.Sangappa@Sun.COM 73*9281SPrakash.Sangappa@Sun.COM #else 74*9281SPrakash.Sangappa@Sun.COM 751841Spraks int vpm_enable = 0; 761841Spraks 77*9281SPrakash.Sangappa@Sun.COM #endif 78*9281SPrakash.Sangappa@Sun.COM 791841Spraks #ifdef SEGKPM_SUPPORT 801841Spraks 811841Spraks 821841Spraks int vpm_cache_enable = 1; 831841Spraks long vpm_cache_percent = 12; 841841Spraks long vpm_cache_size; 851841Spraks int vpm_nfreelist = 0; 861841Spraks int vpmd_freemsk = 0; 871841Spraks 881841Spraks #define VPM_S_PAD 64 891841Spraks union vpm_cpu { 901841Spraks struct { 911841Spraks int vcpu_free_ndx; 921841Spraks ulong_t vcpu_hits; 931841Spraks ulong_t vcpu_misses; 941841Spraks } vcpu; 951841Spraks char vpm_pad[VPM_S_PAD]; 961841Spraks }; 971841Spraks static union vpm_cpu *vpmd_cpu; 981841Spraks 991841Spraks #define vfree_ndx vcpu.vcpu_free_ndx 1001841Spraks 1011841Spraks int vpm_cachemode = VPMCACHE_LRU; 1021841Spraks 1031841Spraks #define PPMTX(pp) (&(pp)->p_ilock) 1041841Spraks 1051841Spraks static struct vpmap *vpmd_vpmap; /* list of vpmap structs preallocated */ 1061841Spraks static struct vpmfree *vpmd_free; 1071841Spraks #define VPMAPMTX(vpm) (&vpm->vpm_mtx) 1081841Spraks #define VPMAP2VMF(vpm) (&vpmd_free[(vpm - vpmd_vpmap) & vpmd_freemsk]) 1091841Spraks #define VPMAP2VMF_NDX(vpm) (ushort_t)((vpm - vpmd_vpmap) & vpmd_freemsk) 1101841Spraks #define VPMP(id) (&vpmd_vpmap[id - 1]) 1111841Spraks #define VPMID(vpm) (uint_t)((vpm - vpmd_vpmap) + 1) 1121841Spraks 1131841Spraks 1141841Spraks #ifdef DEBUG 1151841Spraks 1161841Spraks struct vpm_debug { 1171841Spraks int vpmd_steals; 1181841Spraks int vpmd_contend; 1191841Spraks int vpmd_prevpagelocked; 1201841Spraks int vpmd_getpagefailed; 1211841Spraks int vpmd_zerostart; 1221841Spraks int vpmd_emptyfreelist; 1231841Spraks int vpmd_nofreevpms; 1241841Spraks } vpm_debug; 1251841Spraks 1261841Spraks #define VPM_DEBUG(x) ((vpm_debug.x)++) 1271841Spraks 1281841Spraks int steals; 1291841Spraks int steals_mtbf = 7; 1301841Spraks int contend; 1311841Spraks int contend_mtbf = 127; 1321841Spraks 1331841Spraks #define VPM_MTBF(v, f) (((++(v)) & (f)) != (f)) 1341841Spraks 1351841Spraks #else /* DEBUG */ 1361841Spraks 1371841Spraks #define VPM_MTBF(v, f) (1) 1381841Spraks #define VPM_DEBUG(x) /* nothing */ 1391841Spraks 1401841Spraks #endif 1411841Spraks 1421841Spraks /* 1431841Spraks * The vpm cache. 1441841Spraks * 1451841Spraks * The main purpose of having a cache here is to speed up page_lookup() 1461841Spraks * operations and also provide an LRU(default) behaviour of file pages. The 1471841Spraks * page_lookup() operation tends to be expensive if a page has to be 1481841Spraks * reclaimed from the system page cache("cachelist"). Once we speed up the 1491841Spraks * page_lookup()->page_reclaim() path then there there should be no need for 1501841Spraks * this cache. The system page cache(cachelist) should effectively serve the 1511841Spraks * purpose of caching file pages. 1521841Spraks * 1531841Spraks * This cache is very similar to segmap's smap cache. Each page in the 1541841Spraks * cache is tracked by the structure vpmap_t. But unlike segmap, there is no 1551841Spraks * hash table. The page_t has a reference to the vpmap_t when cached. For a 1561841Spraks * given vnode, offset the page is found by means of a page_lookup() operation. 1571841Spraks * Any page which has a mapping(i.e when cached) will not be in the 1581841Spraks * system 'cachelist'. Hence the page_lookup() will not have to do a 1591841Spraks * page_reclaim(). That is how the cache serves to speed up page_lookup() 1601841Spraks * operations. 1611841Spraks * 1621841Spraks * This cache can be disabled by setting vpm_cache_enable = 0 in /etc/system. 1631841Spraks */ 1641841Spraks 1651841Spraks void 1661841Spraks vpm_init() 1671841Spraks { 1681841Spraks long npages; 1691841Spraks struct vpmap *vpm; 1701841Spraks struct vpmfree *vpmflp; 1711841Spraks int i, ndx; 1721841Spraks extern void prefetch_smap_w(void *); 1731841Spraks 174*9281SPrakash.Sangappa@Sun.COM if (!kpm_enable) { 175*9281SPrakash.Sangappa@Sun.COM vpm_enable = 0; 176*9281SPrakash.Sangappa@Sun.COM } 177*9281SPrakash.Sangappa@Sun.COM 178*9281SPrakash.Sangappa@Sun.COM if (!vpm_enable || !vpm_cache_enable) { 1791841Spraks return; 1801841Spraks } 1811841Spraks 1821841Spraks /* 1831841Spraks * Set the size of the cache. 1841841Spraks */ 1851841Spraks vpm_cache_size = mmu_ptob((physmem * vpm_cache_percent)/100); 1861841Spraks if (vpm_cache_size < VPMAP_MINCACHE) { 1871841Spraks vpm_cache_size = VPMAP_MINCACHE; 1881841Spraks } 1891841Spraks 190*9281SPrakash.Sangappa@Sun.COM if (vpm_cache_size > VPMAP_MAXCACHE) { 191*9281SPrakash.Sangappa@Sun.COM vpm_cache_size = VPMAP_MAXCACHE; 192*9281SPrakash.Sangappa@Sun.COM } 193*9281SPrakash.Sangappa@Sun.COM 1941841Spraks /* 1951841Spraks * Number of freelists. 1961841Spraks */ 1971841Spraks if (vpm_nfreelist == 0) { 1981841Spraks vpm_nfreelist = max_ncpus; 1991841Spraks } else if (vpm_nfreelist < 0 || vpm_nfreelist > 2 * max_ncpus) { 2001841Spraks cmn_err(CE_WARN, "vpmap create : number of freelist " 2011841Spraks "vpm_nfreelist %d using %d", vpm_nfreelist, max_ncpus); 2021841Spraks vpm_nfreelist = 2 * max_ncpus; 2031841Spraks } 2041841Spraks 2051841Spraks /* 2061841Spraks * Round it up to the next power of 2 2071841Spraks */ 2081841Spraks if (vpm_nfreelist & (vpm_nfreelist - 1)) { 2091841Spraks vpm_nfreelist = 1 << (highbit(vpm_nfreelist)); 2101841Spraks } 2111841Spraks vpmd_freemsk = vpm_nfreelist - 1; 2121841Spraks 2131841Spraks /* 2141841Spraks * Use a per cpu rotor index to spread the allocations evenly 2151841Spraks * across the available vpm freelists. 2161841Spraks */ 2171841Spraks vpmd_cpu = kmem_zalloc(sizeof (union vpm_cpu) * max_ncpus, KM_SLEEP); 2181841Spraks ndx = 0; 2191841Spraks for (i = 0; i < max_ncpus; i++) { 2201841Spraks 2211841Spraks vpmd_cpu[i].vfree_ndx = ndx; 2221841Spraks ndx = (ndx + 1) & vpmd_freemsk; 2231841Spraks } 2241841Spraks 2251841Spraks /* 2261841Spraks * Allocate and initialize the freelist. 2271841Spraks */ 2281841Spraks vpmd_free = kmem_zalloc(vpm_nfreelist * sizeof (struct vpmfree), 229*9281SPrakash.Sangappa@Sun.COM KM_SLEEP); 2301841Spraks for (i = 0; i < vpm_nfreelist; i++) { 2311841Spraks 2321841Spraks vpmflp = &vpmd_free[i]; 2331841Spraks /* 2341841Spraks * Set up initial queue pointers. They will get flipped 2351841Spraks * back and forth. 2361841Spraks */ 2371841Spraks vpmflp->vpm_allocq = &vpmflp->vpm_freeq[VPMALLOCQ]; 2381841Spraks vpmflp->vpm_releq = &vpmflp->vpm_freeq[VPMRELEQ]; 2391841Spraks } 2401841Spraks 2411841Spraks npages = mmu_btop(vpm_cache_size); 2421841Spraks 2431841Spraks 2441841Spraks /* 245*9281SPrakash.Sangappa@Sun.COM * Allocate and initialize the vpmap structs. We need to 246*9281SPrakash.Sangappa@Sun.COM * walk the array backwards as the prefetch happens in reverse 247*9281SPrakash.Sangappa@Sun.COM * order. 2481841Spraks */ 249*9281SPrakash.Sangappa@Sun.COM vpmd_vpmap = kmem_alloc(sizeof (struct vpmap) * npages, KM_SLEEP); 250*9281SPrakash.Sangappa@Sun.COM for (vpm = &vpmd_vpmap[npages - 1]; vpm >= vpmd_vpmap; vpm--) { 2511841Spraks struct vpmfree *vpmflp; 2521841Spraks union vpm_freeq *releq; 2531841Spraks struct vpmap *vpmapf; 2541841Spraks 2551841Spraks /* 2561841Spraks * Use prefetch as we have to walk thru a large number of 2571841Spraks * these data structures. We just use the smap's prefetch 258*9281SPrakash.Sangappa@Sun.COM * routine as it does the same. 2591841Spraks */ 2601841Spraks prefetch_smap_w((void *)vpm); 2611841Spraks 262*9281SPrakash.Sangappa@Sun.COM vpm->vpm_vp = NULL; 263*9281SPrakash.Sangappa@Sun.COM vpm->vpm_off = 0; 264*9281SPrakash.Sangappa@Sun.COM vpm->vpm_pp = NULL; 265*9281SPrakash.Sangappa@Sun.COM vpm->vpm_refcnt = 0; 266*9281SPrakash.Sangappa@Sun.COM mutex_init(&vpm->vpm_mtx, NULL, MUTEX_DEFAULT, NULL); 2671841Spraks vpm->vpm_free_ndx = VPMAP2VMF_NDX(vpm); 2681841Spraks 2691841Spraks vpmflp = VPMAP2VMF(vpm); 2701841Spraks releq = vpmflp->vpm_releq; 2711841Spraks 2721841Spraks vpmapf = releq->vpmq_free; 2731841Spraks if (vpmapf == NULL) { 2741841Spraks releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm; 2751841Spraks } else { 2761841Spraks vpm->vpm_next = vpmapf; 2771841Spraks vpm->vpm_prev = vpmapf->vpm_prev; 2781841Spraks vpmapf->vpm_prev = vpm; 2791841Spraks vpm->vpm_prev->vpm_next = vpm; 2801841Spraks releq->vpmq_free = vpm->vpm_next; 2811841Spraks } 2821841Spraks 2831841Spraks /* 2841841Spraks * Indicate that the vpmap is on the releq at start 2851841Spraks */ 2861841Spraks vpm->vpm_ndxflg = VPMRELEQ; 2871841Spraks } 2881841Spraks } 2891841Spraks 2901841Spraks 2911841Spraks /* 2921841Spraks * unhooks vpm from the freelist if it is still on the freelist. 2931841Spraks */ 2941841Spraks #define VPMAP_RMFREELIST(vpm) \ 2951841Spraks { \ 2961841Spraks if (vpm->vpm_next != NULL) { \ 2971841Spraks union vpm_freeq *freeq; \ 2981841Spraks struct vpmfree *vpmflp; \ 2991841Spraks vpmflp = &vpmd_free[vpm->vpm_free_ndx]; \ 3001841Spraks freeq = &vpmflp->vpm_freeq[vpm->vpm_ndxflg]; \ 3011841Spraks mutex_enter(&freeq->vpmq_mtx); \ 3021841Spraks if (freeq->vpmq_free != vpm) { \ 3031841Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; \ 3041841Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; \ 3051841Spraks } else if (vpm == vpm->vpm_next) { \ 3061841Spraks freeq->vpmq_free = NULL; \ 3071841Spraks } else { \ 3081841Spraks freeq->vpmq_free = vpm->vpm_next; \ 3091841Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; \ 3101841Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; \ 3111841Spraks } \ 3121841Spraks mutex_exit(&freeq->vpmq_mtx); \ 3131841Spraks vpm->vpm_next = vpm->vpm_prev = NULL; \ 3141841Spraks } \ 3151841Spraks } 3161841Spraks 3171841Spraks static int 3181841Spraks get_freelndx(int mode) 3191841Spraks { 3201841Spraks int ndx; 3211841Spraks 3221841Spraks ndx = vpmd_cpu[CPU->cpu_seqid].vfree_ndx & vpmd_freemsk; 3231841Spraks switch (mode) { 3241841Spraks 3251841Spraks case VPMCACHE_LRU: 3261841Spraks default: 3271841Spraks vpmd_cpu[CPU->cpu_seqid].vfree_ndx++; 3281841Spraks break; 3291841Spraks } 3301841Spraks return (ndx); 3311841Spraks } 3321841Spraks 3331841Spraks 3341841Spraks /* 3351841Spraks * Find one vpmap structure from the free lists and use it for the newpage. 3361841Spraks * The previous page it cached is dissociated and released. The page_t's 3371841Spraks * p_vpmref is cleared only when the vpm it is pointing to is locked(or 3381841Spraks * for AMD64 when the page is exclusively locked in page_unload. That is 3391841Spraks * because the p_vpmref is treated as mapping). 3401841Spraks * 3411841Spraks * The page's p_vpmref is set when the page is 3421841Spraks * locked(at least SHARED locked). 3431841Spraks */ 3441841Spraks static struct vpmap * 3451841Spraks get_free_vpmap(page_t *newpage) 3461841Spraks { 3471841Spraks struct vpmfree *vpmflp; 3481841Spraks kmutex_t *vmtx; 3491841Spraks struct vpmap *vpm, *first; 3501841Spraks union vpm_freeq *allocq, *releq; 3511841Spraks page_t *pp = NULL; 3521841Spraks int end_ndx, page_locked = 0; 3531841Spraks int free_ndx; 3541841Spraks 3551841Spraks /* 3561841Spraks * get the freelist bin index. 3571841Spraks */ 3581841Spraks free_ndx = get_freelndx(vpm_cachemode); 3591841Spraks 3601841Spraks end_ndx = free_ndx; 3611841Spraks vpmflp = &vpmd_free[free_ndx]; 3621841Spraks 3631841Spraks retry_queue: 3641841Spraks allocq = vpmflp->vpm_allocq; 3651841Spraks mutex_enter(&allocq->vpmq_mtx); 3661841Spraks 3671841Spraks if ((vpm = allocq->vpmq_free) == NULL) { 3681841Spraks 3691841Spraks skip_queue: 3701841Spraks /* 3711841Spraks * The alloc list is empty or this queue is being skipped; 3721841Spraks * first see if the allocq toggled. 3731841Spraks */ 3741841Spraks if (vpmflp->vpm_allocq != allocq) { 3751841Spraks /* queue changed */ 3761841Spraks mutex_exit(&allocq->vpmq_mtx); 3771841Spraks goto retry_queue; 3781841Spraks } 3791841Spraks releq = vpmflp->vpm_releq; 3801841Spraks if (!mutex_tryenter(&releq->vpmq_mtx)) { 3811841Spraks /* cannot get releq; a free vpmap may be there now */ 3821841Spraks mutex_exit(&allocq->vpmq_mtx); 3831841Spraks 3841841Spraks /* 3851841Spraks * This loop could spin forever if this thread has 3861841Spraks * higher priority than the thread that is holding 3871841Spraks * releq->vpmq_mtx. In order to force the other thread 3881841Spraks * to run, we'll lock/unlock the mutex which is safe 3891841Spraks * since we just unlocked the allocq mutex. 3901841Spraks */ 3911841Spraks mutex_enter(&releq->vpmq_mtx); 3921841Spraks mutex_exit(&releq->vpmq_mtx); 3931841Spraks goto retry_queue; 3941841Spraks } 3951841Spraks if (releq->vpmq_free == NULL) { 3961841Spraks VPM_DEBUG(vpmd_emptyfreelist); 3971841Spraks /* 3981841Spraks * This freelist is empty. 3991841Spraks * This should not happen unless clients 4001841Spraks * are failing to release the vpmap after 4011841Spraks * accessing the data. Before resorting 4021841Spraks * to sleeping, try the next list of the same color. 4031841Spraks */ 4041841Spraks free_ndx = (free_ndx + 1) & vpmd_freemsk; 4051841Spraks if (free_ndx != end_ndx) { 4061841Spraks mutex_exit(&releq->vpmq_mtx); 4071841Spraks mutex_exit(&allocq->vpmq_mtx); 4081841Spraks vpmflp = &vpmd_free[free_ndx]; 4091841Spraks goto retry_queue; 4101841Spraks } 4111841Spraks /* 4121841Spraks * Tried all freelists. 4131841Spraks * wait on this list and hope something gets freed. 4141841Spraks */ 4151841Spraks vpmflp->vpm_want++; 4161841Spraks mutex_exit(&vpmflp->vpm_freeq[1].vpmq_mtx); 4171841Spraks cv_wait(&vpmflp->vpm_free_cv, 418*9281SPrakash.Sangappa@Sun.COM &vpmflp->vpm_freeq[0].vpmq_mtx); 4191841Spraks vpmflp->vpm_want--; 4201841Spraks mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx); 4211841Spraks vpmflp = &vpmd_free[free_ndx]; 4221841Spraks VPM_DEBUG(vpmd_nofreevpms); 4231841Spraks goto retry_queue; 4241841Spraks } else { 4251841Spraks /* 4261841Spraks * Something on the rele queue; flip the alloc 4271841Spraks * and rele queues and retry. 4281841Spraks */ 4291841Spraks vpmflp->vpm_allocq = releq; 4301841Spraks vpmflp->vpm_releq = allocq; 4311841Spraks mutex_exit(&allocq->vpmq_mtx); 4321841Spraks mutex_exit(&releq->vpmq_mtx); 4331841Spraks if (page_locked) { 4341841Spraks delay(hz >> 2); 4351841Spraks page_locked = 0; 4361841Spraks } 4371841Spraks goto retry_queue; 4381841Spraks } 4391841Spraks } else { 4401841Spraks int gotnewvpm; 4411841Spraks kmutex_t *pmtx; 4421841Spraks uint_t vpmref; 4431841Spraks 4441841Spraks /* 4451841Spraks * Fastpath the case we get the vpmap mutex 4461841Spraks * on the first try. 4471841Spraks */ 4481841Spraks first = vpm; 4491841Spraks next_vpmap: 4501841Spraks vmtx = VPMAPMTX(vpm); 4511841Spraks if (!mutex_tryenter(vmtx)) { 4521841Spraks /* 4531841Spraks * Another thread is trying to reclaim this slot. 4541841Spraks * Skip to the next queue or vpmap. 4551841Spraks */ 4561841Spraks if ((vpm = vpm->vpm_next) == first) { 4571841Spraks goto skip_queue; 4581841Spraks } else { 4591841Spraks goto next_vpmap; 4601841Spraks } 4611841Spraks } 4621841Spraks 4631841Spraks /* 4641841Spraks * Assign this vpm to the newpage. 4651841Spraks */ 4661841Spraks pmtx = PPMTX(newpage); 4671841Spraks gotnewvpm = 0; 4681841Spraks mutex_enter(pmtx); 4691841Spraks 4701841Spraks /* 4711841Spraks * Check if some other thread already assigned a vpm to 4721841Spraks * this page. 4731841Spraks */ 4741841Spraks if ((vpmref = newpage->p_vpmref) == 0) { 4751841Spraks newpage->p_vpmref = VPMID(vpm); 4761841Spraks gotnewvpm = 1; 4771841Spraks } else { 4781841Spraks VPM_DEBUG(vpmd_contend); 4791841Spraks mutex_exit(vmtx); 4801841Spraks } 4811841Spraks mutex_exit(pmtx); 4821841Spraks 4831841Spraks if (gotnewvpm) { 4841841Spraks 4851841Spraks /* 4861841Spraks * At this point, we've selected the vpm. Remove vpm 4871841Spraks * from its freelist. If vpm is the first one in 4881841Spraks * the freelist, update the head of the freelist. 4891841Spraks */ 4901841Spraks if (first == vpm) { 4911841Spraks ASSERT(first == allocq->vpmq_free); 4921841Spraks allocq->vpmq_free = vpm->vpm_next; 4931841Spraks } 4941841Spraks 4951841Spraks /* 4961841Spraks * If the head of the freelist still points to vpm, 4971841Spraks * then there are no more free vpmaps in that list. 4981841Spraks */ 4991841Spraks if (allocq->vpmq_free == vpm) 5001841Spraks /* 5011841Spraks * Took the last one 5021841Spraks */ 5031841Spraks allocq->vpmq_free = NULL; 5041841Spraks else { 5051841Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; 5061841Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; 5071841Spraks } 5081841Spraks mutex_exit(&allocq->vpmq_mtx); 5091841Spraks vpm->vpm_prev = vpm->vpm_next = NULL; 5101841Spraks 5111841Spraks /* 512*9281SPrakash.Sangappa@Sun.COM * Disassociate the previous page. 5131841Spraks * p_vpmref is used as a mapping reference to the page. 5141841Spraks */ 5151841Spraks if ((pp = vpm->vpm_pp) != NULL && 516*9281SPrakash.Sangappa@Sun.COM vpm->vpm_vp == pp->p_vnode && 517*9281SPrakash.Sangappa@Sun.COM vpm->vpm_off == pp->p_offset) { 5181841Spraks 5191841Spraks pmtx = PPMTX(pp); 5201841Spraks if (page_trylock(pp, SE_SHARED)) { 5211841Spraks /* 5221841Spraks * Now verify that it is the correct 5231841Spraks * page. If not someone else stole it, 5241841Spraks * so just unlock it and leave. 5251841Spraks */ 5261841Spraks mutex_enter(pmtx); 5271841Spraks if (PP_ISFREE(pp) || 528*9281SPrakash.Sangappa@Sun.COM vpm->vpm_vp != pp->p_vnode || 529*9281SPrakash.Sangappa@Sun.COM vpm->vpm_off != pp->p_offset || 530*9281SPrakash.Sangappa@Sun.COM pp->p_vpmref != VPMID(vpm)) { 5311841Spraks mutex_exit(pmtx); 5321841Spraks 5331841Spraks page_unlock(pp); 5341841Spraks } else { 5351841Spraks /* 5361841Spraks * Release the page. 5371841Spraks */ 5381841Spraks pp->p_vpmref = 0; 5391841Spraks mutex_exit(pmtx); 5401841Spraks (void) page_release(pp, 1); 5411841Spraks } 5421841Spraks } else { 5431841Spraks /* 5441841Spraks * If the page cannot be locked, just 5451841Spraks * clear the p_vpmref and go. 5461841Spraks */ 5471841Spraks mutex_enter(pmtx); 5481841Spraks if (pp->p_vpmref == VPMID(vpm)) { 5491841Spraks pp->p_vpmref = 0; 5501841Spraks } 5511841Spraks mutex_exit(pmtx); 5521841Spraks VPM_DEBUG(vpmd_prevpagelocked); 5531841Spraks } 5541841Spraks } 5551841Spraks 5561841Spraks /* 5571841Spraks * Setup vpm to point to the new page. 5581841Spraks */ 5591841Spraks vpm->vpm_pp = newpage; 5601841Spraks vpm->vpm_vp = newpage->p_vnode; 5611841Spraks vpm->vpm_off = newpage->p_offset; 5621841Spraks 5631841Spraks } else { 5641841Spraks int steal = !VPM_MTBF(steals, steals_mtbf); 5651841Spraks /* 5661841Spraks * Page already has a vpm assigned just use that. 5671841Spraks * Grab the vpm mutex and verify that it is still 5681841Spraks * the correct one. The pp->p_vpmref should not change 5691841Spraks * once we have the vpm mutex and the page lock. 5701841Spraks */ 5711841Spraks mutex_exit(&allocq->vpmq_mtx); 5721841Spraks vpm = VPMP(vpmref); 5731841Spraks vmtx = VPMAPMTX(vpm); 5741841Spraks mutex_enter(vmtx); 5751841Spraks if ((steal && vpm->vpm_refcnt == 0) || 5761841Spraks vpm->vpm_pp != newpage) { 5771841Spraks /* 5781841Spraks * The vpm got stolen, retry. 5791841Spraks * clear the p_vpmref. 5801841Spraks */ 5811841Spraks pmtx = PPMTX(newpage); 5821841Spraks mutex_enter(pmtx); 5831841Spraks if (newpage->p_vpmref == vpmref) { 5841841Spraks newpage->p_vpmref = 0; 5851841Spraks } 5861841Spraks mutex_exit(pmtx); 5871841Spraks 5881841Spraks mutex_exit(vmtx); 5891841Spraks VPM_DEBUG(vpmd_steals); 5901841Spraks goto retry_queue; 5911841Spraks } else if (vpm->vpm_refcnt == 0) { 5921841Spraks /* 5931841Spraks * Remove it from the free list if it 5941841Spraks * exists there. 5951841Spraks */ 5961841Spraks VPMAP_RMFREELIST(vpm); 5971841Spraks } 5981841Spraks } 5991841Spraks return (vpm); 6001841Spraks } 6011841Spraks } 6021841Spraks 6031841Spraks static void 6041841Spraks free_vpmap(struct vpmap *vpm) 6051841Spraks { 6061841Spraks struct vpmfree *vpmflp; 6071841Spraks struct vpmap *vpmfreelist; 6081841Spraks union vpm_freeq *releq; 6091841Spraks 6101841Spraks ASSERT(MUTEX_HELD(VPMAPMTX(vpm))); 6111841Spraks 6121841Spraks if (vpm->vpm_refcnt != 0) { 6131841Spraks panic("free_vpmap"); 6141841Spraks /*NOTREACHED*/ 6151841Spraks } 6161841Spraks 6171841Spraks vpmflp = &vpmd_free[vpm->vpm_free_ndx]; 6181841Spraks /* 6191841Spraks * Add to the tail of the release queue 6201841Spraks * Note that vpm_releq and vpm_allocq could toggle 6211841Spraks * before we get the lock. This does not affect 6221841Spraks * correctness as the 2 queues are only maintained 6231841Spraks * to reduce lock pressure. 6241841Spraks */ 6251841Spraks releq = vpmflp->vpm_releq; 6261841Spraks if (releq == &vpmflp->vpm_freeq[0]) { 6271841Spraks vpm->vpm_ndxflg = 0; 6281841Spraks } else { 6291841Spraks vpm->vpm_ndxflg = 1; 6301841Spraks } 6311841Spraks mutex_enter(&releq->vpmq_mtx); 6321841Spraks vpmfreelist = releq->vpmq_free; 6331841Spraks if (vpmfreelist == 0) { 6341841Spraks int want; 6351841Spraks 6361841Spraks releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm; 6371841Spraks /* 6381841Spraks * Both queue mutexes are held to set vpm_want; 6391841Spraks * snapshot the value before dropping releq mutex. 6401841Spraks * If vpm_want appears after the releq mutex is dropped, 6411841Spraks * then the vpmap just freed is already gone. 6421841Spraks */ 6431841Spraks want = vpmflp->vpm_want; 6441841Spraks mutex_exit(&releq->vpmq_mtx); 6451841Spraks /* 6461841Spraks * See if there was a waiter before dropping the releq mutex 6471841Spraks * then recheck after obtaining vpm_freeq[0] mutex as 6481841Spraks * the another thread may have already signaled. 6491841Spraks */ 6501841Spraks if (want) { 6511841Spraks mutex_enter(&vpmflp->vpm_freeq[0].vpmq_mtx); 6521841Spraks if (vpmflp->vpm_want) 6531841Spraks cv_signal(&vpmflp->vpm_free_cv); 6541841Spraks mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx); 6551841Spraks } 6561841Spraks } else { 6571841Spraks vpm->vpm_next = vpmfreelist; 6581841Spraks vpm->vpm_prev = vpmfreelist->vpm_prev; 6591841Spraks vpmfreelist->vpm_prev = vpm; 6601841Spraks vpm->vpm_prev->vpm_next = vpm; 6611841Spraks mutex_exit(&releq->vpmq_mtx); 6621841Spraks } 6631841Spraks } 6641841Spraks 6651841Spraks /* 6661841Spraks * Get the vpmap for the page. 6671841Spraks * The refcnt of this vpm is incremented. 6681841Spraks */ 6691841Spraks static struct vpmap * 6701841Spraks get_vpmap(page_t *pp) 6711841Spraks { 6721841Spraks struct vpmap *vpm = NULL; 6731841Spraks kmutex_t *vmtx; 6741841Spraks kmutex_t *pmtx; 6751841Spraks unsigned int refid; 6761841Spraks 6771841Spraks ASSERT((pp != NULL) && PAGE_LOCKED(pp)); 6781841Spraks 6791841Spraks if (VPM_MTBF(contend, contend_mtbf) && (refid = pp->p_vpmref) != 0) { 6801841Spraks vpm = VPMP(refid); 6811841Spraks vmtx = VPMAPMTX(vpm); 6821841Spraks mutex_enter(vmtx); 6831841Spraks /* 6841841Spraks * Since we have the page lock and the vpm mutex, the 6851841Spraks * pp->p_vpmref cannot change. 6861841Spraks */ 6871841Spraks if (vpm->vpm_pp != pp) { 6881841Spraks pmtx = PPMTX(pp); 6891841Spraks 6901841Spraks /* 6911841Spraks * Clear the p_vpmref as it is incorrect. 6921841Spraks * This can happen if the page was stolen. 6931841Spraks * On x64 this should not happen as p_vpmref 6941841Spraks * is treated as a mapping on the page. So 6951841Spraks * if the page is stolen, the mapping would have 6961841Spraks * been cleared in page_unload(). 6971841Spraks */ 6981841Spraks mutex_enter(pmtx); 6991841Spraks if (pp->p_vpmref == refid) 7001841Spraks pp->p_vpmref = 0; 7011841Spraks mutex_exit(pmtx); 7021841Spraks 7031841Spraks mutex_exit(vmtx); 7041841Spraks vpm = NULL; 7051841Spraks } else if (vpm->vpm_refcnt == 0) { 7061841Spraks /* 7071841Spraks * Got the vpm, remove it from the free 7081841Spraks * list if it exists there. 7091841Spraks */ 7101841Spraks VPMAP_RMFREELIST(vpm); 7111841Spraks } 7121841Spraks } 7131841Spraks if (vpm == NULL) { 7141841Spraks /* 7151841Spraks * get_free_vpmap() returns with the vpmap mutex held. 7161841Spraks */ 7171841Spraks vpm = get_free_vpmap(pp); 7181841Spraks vmtx = VPMAPMTX(vpm); 7191841Spraks vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_misses++; 7201841Spraks } else { 7211841Spraks vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_hits++; 7221841Spraks } 7231841Spraks 7241841Spraks vpm->vpm_refcnt++; 7251841Spraks mutex_exit(vmtx); 7261841Spraks 7271841Spraks return (vpm); 7281841Spraks } 7291841Spraks 7301841Spraks /* END --- vpm cache ---- */ 7311841Spraks 7321841Spraks /* 7331841Spraks * The vnode page mapping(vpm) interface routines. 7341841Spraks */ 7351841Spraks 7361841Spraks /* 7371841Spraks * Find or create the pages starting form baseoff for specified 7381841Spraks * length 'len'. 7391841Spraks */ 7401841Spraks static int 7411841Spraks vpm_pagecreate( 7421841Spraks struct vnode *vp, 7431841Spraks u_offset_t baseoff, 7441841Spraks size_t len, 7451841Spraks vmap_t vml[], 7461841Spraks int nseg, 7471841Spraks int *newpage) 7481841Spraks { 7491841Spraks 7501841Spraks page_t *pp = NULL; 7511841Spraks caddr_t base; 7521841Spraks u_offset_t off = baseoff; 7531841Spraks int i; 754*9281SPrakash.Sangappa@Sun.COM ASSERT(nseg >= MINVMAPS && nseg <= MAXVMAPS); 7551841Spraks 7562353Spraks for (i = 0; len > 0; len -= PAGESIZE, i++) { 7571841Spraks struct vpmap *vpm; 7581841Spraks 7591841Spraks 7601841Spraks if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 7611841Spraks 7621841Spraks base = segkpm_create_va(off); 7631841Spraks 7641841Spraks /* 7651841Spraks * the seg pointer passed in is just advisor. Just 7661841Spraks * pass segkmap for now like segmap does with 7671841Spraks * segmap_kpm enabled. 7681841Spraks */ 7691841Spraks if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 7701841Spraks segkmap, base)) == NULL) { 7711841Spraks panic("segmap_pagecreate_vpm: " 7721841Spraks "page_create failed"); 7731841Spraks /*NOTREACHED*/ 7741841Spraks } 7751841Spraks if (newpage != NULL) 7761841Spraks *newpage = 1; 7771841Spraks 7781841Spraks page_io_unlock(pp); 7791841Spraks } 7801841Spraks 7811841Spraks /* 7821841Spraks * Get the vpm for this page_t. 7831841Spraks */ 7841841Spraks if (vpm_cache_enable) { 7851841Spraks vpm = get_vpmap(pp); 7861841Spraks vml[i].vs_data = (void *)&vpm->vpm_pp; 7871841Spraks } else { 7881841Spraks vml[i].vs_data = (void *)pp; 7891841Spraks pp->p_vpmref = 0; 7901841Spraks } 7911841Spraks 7921841Spraks vml[i].vs_addr = hat_kpm_mapin(pp, 0); 7931841Spraks vml[i].vs_len = PAGESIZE; 7941841Spraks 7951841Spraks off += PAGESIZE; 7961841Spraks } 7971841Spraks vml[i].vs_data = NULL; 7981841Spraks vml[i].vs_addr = (caddr_t)NULL; 7991841Spraks return (0); 8001841Spraks } 8011841Spraks 8021841Spraks 8031841Spraks /* 8041841Spraks * Returns vpm mappings of pages in the range [off, off+len], where 8051841Spraks * len is rounded up to the PAGESIZE boundary. The list of pages and 8061841Spraks * the page addresses are returned in the SGL vml (vmap_t) array passed in. 8071841Spraks * The nseg is the number of vmap_t entries in the array. 8081841Spraks * 8091841Spraks * The segmap's SM_LOCKPROTO usage is not supported by these interfaces. 8101841Spraks * For such cases, use the seg_map interfaces. 8111841Spraks */ 8121841Spraks int 8131841Spraks vpm_map_pages( 8141841Spraks struct vnode *vp, 8151841Spraks u_offset_t off, 8161841Spraks size_t len, 8171841Spraks int fetchpage, 8181841Spraks vmap_t *vml, 8191841Spraks int nseg, 8201841Spraks int *newpage, 8211841Spraks enum seg_rw rw) 8221841Spraks { 8231841Spraks extern struct vnode *common_specvp(); 8241841Spraks u_offset_t baseoff; 8251841Spraks uint_t prot; 8261841Spraks caddr_t base; 8271841Spraks page_t *pp, *pplist[MAXVMAPS]; 8281841Spraks struct vpmap *vpm; 8291841Spraks int i, error = 0; 830*9281SPrakash.Sangappa@Sun.COM size_t tlen; 8311841Spraks 832*9281SPrakash.Sangappa@Sun.COM ASSERT(nseg >= MINVMAPS && nseg <= MAXVMAPS); 8331841Spraks baseoff = off & (offset_t)PAGEMASK; 8341841Spraks vml[0].vs_data = NULL; 8351841Spraks vml[0].vs_addr = (caddr_t)NULL; 836*9281SPrakash.Sangappa@Sun.COM 837*9281SPrakash.Sangappa@Sun.COM tlen = P2ROUNDUP(off + len, PAGESIZE) - baseoff; 8381841Spraks /* 839*9281SPrakash.Sangappa@Sun.COM * Restrict it to VPMMAXLEN. 8401841Spraks */ 841*9281SPrakash.Sangappa@Sun.COM if (tlen > (VPMMAXPGS * PAGESIZE)) { 842*9281SPrakash.Sangappa@Sun.COM tlen = VPMMAXPGS * PAGESIZE; 8431841Spraks } 844*9281SPrakash.Sangappa@Sun.COM /* 845*9281SPrakash.Sangappa@Sun.COM * Ensure length fits within the vml[] array. One element of 846*9281SPrakash.Sangappa@Sun.COM * the array is used to mark the end of the scatter/gather list 847*9281SPrakash.Sangappa@Sun.COM * of valid mappings by setting its vs_addr = NULL. Leave space 848*9281SPrakash.Sangappa@Sun.COM * for this element. 849*9281SPrakash.Sangappa@Sun.COM */ 850*9281SPrakash.Sangappa@Sun.COM if (tlen > ((nseg - 1) * PAGESIZE)) { 851*9281SPrakash.Sangappa@Sun.COM tlen = ((nseg - 1) * PAGESIZE); 852*9281SPrakash.Sangappa@Sun.COM } 853*9281SPrakash.Sangappa@Sun.COM len = tlen; 8541841Spraks 8551841Spraks /* 8561841Spraks * If this is a block device we have to be sure to use the 8571841Spraks * "common" block device vnode for the mapping. 8581841Spraks */ 8591841Spraks if (vp->v_type == VBLK) 8601841Spraks vp = common_specvp(vp); 8611841Spraks 8621841Spraks 8631841Spraks if (!fetchpage) 8641841Spraks return (vpm_pagecreate(vp, baseoff, len, vml, nseg, newpage)); 8651841Spraks 8662353Spraks for (i = 0; len > 0; len -= PAGESIZE, i++, pplist[i] = NULL) { 8671841Spraks 8681841Spraks pp = page_lookup(vp, baseoff, SE_SHARED); 8691841Spraks 8701841Spraks /* 8711841Spraks * If we did not find the page or if this page was not 872*9281SPrakash.Sangappa@Sun.COM * in vpm cache(p_vpmref == 0), then let VOP_GETPAGE get 873*9281SPrakash.Sangappa@Sun.COM * all the pages. 8741841Spraks * We need to call VOP_GETPAGE so that filesytems can do some 8751841Spraks * (un)necessary tracking for sequential access. 8761841Spraks */ 8771841Spraks 8781841Spraks if (pp == NULL || (vpm_cache_enable && pp->p_vpmref == 0) || 879*9281SPrakash.Sangappa@Sun.COM (rw == S_WRITE && hat_page_getattr(pp, P_MOD | P_REF) 880*9281SPrakash.Sangappa@Sun.COM != (P_MOD | P_REF))) { 881*9281SPrakash.Sangappa@Sun.COM int j; 8821841Spraks if (pp != NULL) { 8831841Spraks page_unlock(pp); 8841841Spraks } 885*9281SPrakash.Sangappa@Sun.COM /* 886*9281SPrakash.Sangappa@Sun.COM * If we did not find the desired set of pages, 887*9281SPrakash.Sangappa@Sun.COM * from the page cache, just call VOP_GETPAGE to get 888*9281SPrakash.Sangappa@Sun.COM * all the pages. 889*9281SPrakash.Sangappa@Sun.COM */ 890*9281SPrakash.Sangappa@Sun.COM for (j = 0; j < i; j++) { 891*9281SPrakash.Sangappa@Sun.COM page_unlock(pplist[j]); 892*9281SPrakash.Sangappa@Sun.COM } 8931841Spraks 894*9281SPrakash.Sangappa@Sun.COM 895*9281SPrakash.Sangappa@Sun.COM baseoff = off & (offset_t)PAGEMASK; 8961841Spraks /* 8971841Spraks * Pass a dummy address as it will be required 8981841Spraks * by page_create_va(). We pass segkmap as the seg 8991841Spraks * as some file systems(UFS) check it. 9001841Spraks */ 9011841Spraks base = segkpm_create_va(baseoff); 9021841Spraks 903*9281SPrakash.Sangappa@Sun.COM error = VOP_GETPAGE(vp, baseoff, tlen, &prot, pplist, 904*9281SPrakash.Sangappa@Sun.COM tlen, segkmap, base, rw, CRED(), NULL); 9051841Spraks if (error) { 9061841Spraks VPM_DEBUG(vpmd_getpagefailed); 907*9281SPrakash.Sangappa@Sun.COM pplist[0] = NULL; 9081841Spraks } 9091841Spraks break; 9101841Spraks } else { 9111841Spraks pplist[i] = pp; 9121841Spraks baseoff += PAGESIZE; 9131841Spraks } 9141841Spraks } 9151841Spraks 9161841Spraks if (error) { 9171841Spraks for (i = 0; pplist[i] != NULL; i++) { 9181841Spraks page_unlock(pplist[i]); 9191841Spraks pplist[i] = NULL; 9201841Spraks } 9211841Spraks vml[0].vs_addr = NULL; 9221841Spraks vml[0].vs_data = NULL; 9232970Spraks return (error); 9241841Spraks } 9251841Spraks 9261841Spraks /* 9271841Spraks * Get the vpm's for pages. 9281841Spraks */ 9291841Spraks for (i = 0; pplist[i] != NULL; i++) { 9301841Spraks if (vpm_cache_enable) { 9311841Spraks vpm = get_vpmap(pplist[i]); 9321841Spraks vml[i].vs_data = (void *)&(vpm->vpm_pp); 9331841Spraks } else { 9341841Spraks vml[i].vs_data = (void *)pplist[i]; 9351841Spraks pplist[i]->p_vpmref = 0; 9361841Spraks } 9371841Spraks 9381841Spraks vml[i].vs_addr = hat_kpm_mapin(pplist[i], 0); 9391841Spraks vml[i].vs_len = PAGESIZE; 9401841Spraks } 9411841Spraks 9421841Spraks vml[i].vs_data = NULL; 9431841Spraks vml[i].vs_addr = (caddr_t)NULL; 9441841Spraks 9451841Spraks return (0); 9461841Spraks } 9471841Spraks 9481841Spraks /* 9491841Spraks * Release the vpm mappings on the pages and unlock them. 9501841Spraks */ 9511841Spraks void 9521841Spraks vpm_unmap_pages(vmap_t vml[], enum seg_rw rw) 9531841Spraks { 9541841Spraks int i; 9551841Spraks struct vpmap *vpm; 9561841Spraks kmutex_t *mtx; 9571841Spraks page_t *pp; 9581841Spraks 9591841Spraks for (i = 0; vml[i].vs_data != NULL; i++) { 9601841Spraks ASSERT(IS_KPM_ADDR(vml[i].vs_addr)); 9611841Spraks 9621841Spraks if (vpm_cache_enable) { 9631841Spraks pp = *(((page_t **)vml[i].vs_data)); 9641841Spraks } else { 9651841Spraks pp = (page_t *)vml[i].vs_data; 9661841Spraks } 9671841Spraks 9681841Spraks /* 9691841Spraks * Mark page as being modified or referenced, bacause vpm pages 9701841Spraks * would not cause faults where it would be set normally. 9711841Spraks */ 9721841Spraks if (rw == S_WRITE) { 9731841Spraks hat_setrefmod(pp); 9741841Spraks } else { 9751841Spraks ASSERT(rw == S_READ); 9761841Spraks hat_setref(pp); 9771841Spraks } 9781841Spraks 9791841Spraks if (vpm_cache_enable) { 980*9281SPrakash.Sangappa@Sun.COM vpm = (struct vpmap *)((char *)vml[i].vs_data 981*9281SPrakash.Sangappa@Sun.COM - offsetof(struct vpmap, vpm_pp)); 982*9281SPrakash.Sangappa@Sun.COM hat_kpm_mapout(pp, 0, vml[i].vs_addr); 9831841Spraks page_unlock(pp); 9841841Spraks mtx = VPMAPMTX(vpm); 9851841Spraks mutex_enter(mtx); 9861841Spraks 9871841Spraks if (--vpm->vpm_refcnt == 0) { 9881841Spraks free_vpmap(vpm); 9891841Spraks } 9901841Spraks mutex_exit(mtx); 9911841Spraks } else { 9921841Spraks hat_kpm_mapout(pp, 0, vml[i].vs_addr); 9931841Spraks (void) page_release(pp, 1); 9941841Spraks } 9951841Spraks vml[i].vs_data = NULL; 9961841Spraks vml[i].vs_addr = NULL; 9971841Spraks } 9981841Spraks } 9991841Spraks 10001841Spraks /* 10011841Spraks * Given the vp, off and the uio structure, this routine will do the 10021841Spraks * the copy (uiomove). If the last page created is partially written, 10031841Spraks * the rest of the page is zeroed out. It also zeros the beginning of 10041841Spraks * the first page till the start offset if requested(zerostart). 10051841Spraks * If pages are to be fetched, it will call the filesystem's getpage 10061841Spraks * function (VOP_GETPAGE) to get them, otherwise they will be created if 10071841Spraks * not already present in the page cache. 10081841Spraks */ 10091841Spraks int 10101841Spraks vpm_data_copy(struct vnode *vp, 10111841Spraks u_offset_t off, 10121841Spraks size_t len, 10131841Spraks struct uio *uio, 10141841Spraks int fetchpage, 10151841Spraks int *newpage, 10161841Spraks int zerostart, 10171841Spraks enum seg_rw rw) 10181841Spraks { 10191841Spraks int error; 10201841Spraks struct vmap vml[MINVMAPS]; 10211841Spraks enum uio_rw uiorw; 10221841Spraks int npages = 0; 10231841Spraks 10241841Spraks uiorw = (rw == S_WRITE) ? UIO_WRITE : UIO_READ; 10251841Spraks /* 10261841Spraks * 'off' will be the offset where the I/O starts. 10271841Spraks * We get the pages starting at the (off & PAGEMASK) 10281841Spraks * page boundary. 10291841Spraks */ 10301841Spraks error = vpm_map_pages(vp, off, (uint_t)len, 1031*9281SPrakash.Sangappa@Sun.COM fetchpage, vml, MINVMAPS, &npages, rw); 10321841Spraks 10331841Spraks if (newpage != NULL) 10341841Spraks *newpage = npages; 10351841Spraks if (!error) { 10361841Spraks int i, pn, slen = len; 10371841Spraks int pon = off & PAGEOFFSET; 10381841Spraks 10391841Spraks /* 10401841Spraks * Clear from the beginning of the page to start offset 10411841Spraks * if requested. 10421841Spraks */ 10431841Spraks if (!fetchpage && zerostart) { 10441841Spraks (void) kzero(vml[0].vs_addr, (uint_t)pon); 10451841Spraks VPM_DEBUG(vpmd_zerostart); 10461841Spraks } 10471841Spraks 10481841Spraks for (i = 0; !error && slen > 0 && 1049*9281SPrakash.Sangappa@Sun.COM vml[i].vs_addr != NULL; i++) { 10501841Spraks pn = (int)MIN(slen, (PAGESIZE - pon)); 10511841Spraks error = uiomove(vml[i].vs_addr + pon, 1052*9281SPrakash.Sangappa@Sun.COM (long)pn, uiorw, uio); 10531841Spraks slen -= pn; 10541841Spraks pon = 0; 10551841Spraks } 10561841Spraks 10571841Spraks /* 10581841Spraks * When new pages are created, zero out part of the 10591841Spraks * page we did not copy to. 10601841Spraks */ 10611841Spraks if (!fetchpage && npages && 1062*9281SPrakash.Sangappa@Sun.COM uio->uio_loffset < roundup(off + len, PAGESIZE)) { 10631841Spraks int nzero; 10641841Spraks 10651841Spraks pon = (uio->uio_loffset & PAGEOFFSET); 10661841Spraks nzero = PAGESIZE - pon; 10671841Spraks i = (uio->uio_loffset - (off & PAGEMASK)) / PAGESIZE; 10681841Spraks (void) kzero(vml[i].vs_addr + pon, (uint_t)nzero); 10691841Spraks } 10701841Spraks vpm_unmap_pages(vml, rw); 10711841Spraks } 10721841Spraks return (error); 10731841Spraks } 10741841Spraks 10751841Spraks /* 10761841Spraks * called to flush pages for the given vnode covering 10771841Spraks * [off, off+len] range. 10781841Spraks */ 10791841Spraks int 10801841Spraks vpm_sync_pages(struct vnode *vp, 10811841Spraks u_offset_t off, 10821841Spraks size_t len, 10831841Spraks uint_t flags) 10841841Spraks { 10851841Spraks extern struct vnode *common_specvp(); 10861841Spraks int bflags = 0; 10871841Spraks int error = 0; 10881841Spraks size_t psize = roundup(len, PAGESIZE); 10891841Spraks 10901841Spraks /* 10911841Spraks * If this is a block device we have to be sure to use the 10921841Spraks * "common" block device vnode for the mapping. 10931841Spraks */ 10941841Spraks if (vp->v_type == VBLK) 10951841Spraks vp = common_specvp(vp); 10961841Spraks 10971841Spraks if ((flags & ~SM_DONTNEED) != 0) { 10981841Spraks if (flags & SM_ASYNC) 10991841Spraks bflags |= B_ASYNC; 11001841Spraks if (flags & SM_INVAL) 11011841Spraks bflags |= B_INVAL; 11021841Spraks if (flags & SM_DESTROY) 11031841Spraks bflags |= (B_INVAL|B_TRUNC); 11041841Spraks if (flags & SM_FREE) 11051841Spraks bflags |= B_FREE; 11061841Spraks if (flags & SM_DONTNEED) 11071841Spraks bflags |= B_DONTNEED; 11081841Spraks 11095331Samw error = VOP_PUTPAGE(vp, off, psize, bflags, CRED(), NULL); 11101841Spraks } 11111841Spraks 11121841Spraks return (error); 11131841Spraks } 11141841Spraks 11151841Spraks 11161841Spraks #else /* SEGKPM_SUPPORT */ 11171841Spraks 11181841Spraks /* vpm stubs */ 11191841Spraks void 11201841Spraks vpm_init() 11211841Spraks { 11221841Spraks } 11231841Spraks 11241841Spraks /*ARGSUSED*/ 11251841Spraks int 11261841Spraks vpm_pagecreate( 11271841Spraks struct vnode *vp, 11281841Spraks u_offset_t baseoff, 11291841Spraks size_t len, 11301841Spraks vmap_t vml[], 11311841Spraks int nseg, 11321841Spraks int *newpage) 11331841Spraks { 11341841Spraks return (0); 11351841Spraks } 11361841Spraks 11371841Spraks /*ARGSUSED*/ 11381841Spraks int 11391841Spraks vpm_map_pages( 11401841Spraks struct vnode *vp, 11411841Spraks u_offset_t off, 11421841Spraks size_t len, 11431841Spraks int fetchpage, 11441841Spraks vmap_t vml[], 11451841Spraks int nseg, 11461841Spraks int *newpage, 11471841Spraks enum seg_rw rw) 11481841Spraks { 11491841Spraks return (0); 11501841Spraks } 11511841Spraks 11521841Spraks /*ARGSUSED*/ 11531841Spraks int 11541841Spraks vpm_data_copy(struct vnode *vp, 11551841Spraks u_offset_t off, 11561841Spraks size_t len, 11571841Spraks struct uio *uio, 11581841Spraks int fetchpage, 11591841Spraks int *newpage, 11601841Spraks int zerostart, 11611841Spraks enum seg_rw rw) 11621841Spraks { 11631841Spraks return (0); 11641841Spraks } 11651841Spraks 11661841Spraks /*ARGSUSED*/ 11671841Spraks void 11681841Spraks vpm_unmap_pages(vmap_t vml[], enum seg_rw rw) 11691841Spraks { 11701841Spraks } 11711841Spraks /*ARGSUSED*/ 11721841Spraks int 11731841Spraks vpm_sync_pages(struct vnode *vp, 11741841Spraks u_offset_t off, 11751841Spraks size_t len, 11761841Spraks uint_t flags) 11771841Spraks { 11781841Spraks return (0); 11791841Spraks } 11801841Spraks #endif /* SEGKPM_SUPPORT */ 1181