11841Spraks /*
21841Spraks * CDDL HEADER START
31841Spraks *
41841Spraks * The contents of this file are subject to the terms of the
51841Spraks * Common Development and Distribution License (the "License").
61841Spraks * You may not use this file except in compliance with the License.
71841Spraks *
81841Spraks * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91841Spraks * or http://www.opensolaris.org/os/licensing.
101841Spraks * See the License for the specific language governing permissions
111841Spraks * and limitations under the License.
121841Spraks *
131841Spraks * When distributing Covered Code, include this CDDL HEADER in each
141841Spraks * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151841Spraks * If applicable, add the following below this CDDL HEADER, with the
161841Spraks * fields enclosed by brackets "[]" replaced with your own identifying
171841Spraks * information: Portions Copyright [yyyy] [name of copyright owner]
181841Spraks *
191841Spraks * CDDL HEADER END
201841Spraks */
211841Spraks /*
22*9281SPrakash.Sangappa@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
231841Spraks * Use is subject to license terms.
241841Spraks */
251841Spraks
261841Spraks
271841Spraks /*
281841Spraks * VM - generic vnode page mapping interfaces.
291841Spraks *
301841Spraks * Mechanism to provide temporary mappings to vnode pages.
311841Spraks * The typical use would be to copy/access file data.
321841Spraks */
331841Spraks
341841Spraks #include <sys/types.h>
351841Spraks #include <sys/t_lock.h>
361841Spraks #include <sys/param.h>
371841Spraks #include <sys/sysmacros.h>
381841Spraks #include <sys/buf.h>
391841Spraks #include <sys/systm.h>
401841Spraks #include <sys/vnode.h>
411841Spraks #include <sys/mman.h>
421841Spraks #include <sys/errno.h>
431841Spraks #include <sys/cred.h>
441841Spraks #include <sys/kmem.h>
451841Spraks #include <sys/vtrace.h>
461841Spraks #include <sys/cmn_err.h>
471841Spraks #include <sys/debug.h>
481841Spraks #include <sys/thread.h>
491841Spraks #include <sys/dumphdr.h>
501841Spraks #include <sys/bitmap.h>
511841Spraks #include <sys/lgrp.h>
521841Spraks
531841Spraks #include <vm/seg_kmem.h>
541841Spraks #include <vm/hat.h>
551841Spraks #include <vm/as.h>
561841Spraks #include <vm/seg.h>
571841Spraks #include <vm/seg_kpm.h>
581841Spraks #include <vm/seg_map.h>
591841Spraks #include <vm/page.h>
601841Spraks #include <vm/pvn.h>
611841Spraks #include <vm/rm.h>
621841Spraks #include <vm/vpm.h>
631841Spraks
64*9281SPrakash.Sangappa@Sun.COM
65*9281SPrakash.Sangappa@Sun.COM #ifdef SEGKPM_SUPPORT
661841Spraks /*
67*9281SPrakash.Sangappa@Sun.COM * VPM can be disabled by setting vpm_enable = 0 in
68*9281SPrakash.Sangappa@Sun.COM * /etc/system.
69*9281SPrakash.Sangappa@Sun.COM *
701841Spraks */
71*9281SPrakash.Sangappa@Sun.COM int vpm_enable = 1;
72*9281SPrakash.Sangappa@Sun.COM
73*9281SPrakash.Sangappa@Sun.COM #else
74*9281SPrakash.Sangappa@Sun.COM
751841Spraks int vpm_enable = 0;
761841Spraks
77*9281SPrakash.Sangappa@Sun.COM #endif
78*9281SPrakash.Sangappa@Sun.COM
791841Spraks #ifdef SEGKPM_SUPPORT
801841Spraks
811841Spraks
821841Spraks int vpm_cache_enable = 1;
831841Spraks long vpm_cache_percent = 12;
841841Spraks long vpm_cache_size;
851841Spraks int vpm_nfreelist = 0;
861841Spraks int vpmd_freemsk = 0;
871841Spraks
881841Spraks #define VPM_S_PAD 64
891841Spraks union vpm_cpu {
901841Spraks struct {
911841Spraks int vcpu_free_ndx;
921841Spraks ulong_t vcpu_hits;
931841Spraks ulong_t vcpu_misses;
941841Spraks } vcpu;
951841Spraks char vpm_pad[VPM_S_PAD];
961841Spraks };
971841Spraks static union vpm_cpu *vpmd_cpu;
981841Spraks
991841Spraks #define vfree_ndx vcpu.vcpu_free_ndx
1001841Spraks
1011841Spraks int vpm_cachemode = VPMCACHE_LRU;
1021841Spraks
1031841Spraks #define PPMTX(pp) (&(pp)->p_ilock)
1041841Spraks
1051841Spraks static struct vpmap *vpmd_vpmap; /* list of vpmap structs preallocated */
1061841Spraks static struct vpmfree *vpmd_free;
1071841Spraks #define VPMAPMTX(vpm) (&vpm->vpm_mtx)
1081841Spraks #define VPMAP2VMF(vpm) (&vpmd_free[(vpm - vpmd_vpmap) & vpmd_freemsk])
1091841Spraks #define VPMAP2VMF_NDX(vpm) (ushort_t)((vpm - vpmd_vpmap) & vpmd_freemsk)
1101841Spraks #define VPMP(id) (&vpmd_vpmap[id - 1])
1111841Spraks #define VPMID(vpm) (uint_t)((vpm - vpmd_vpmap) + 1)
1121841Spraks
1131841Spraks
1141841Spraks #ifdef DEBUG
1151841Spraks
1161841Spraks struct vpm_debug {
1171841Spraks int vpmd_steals;
1181841Spraks int vpmd_contend;
1191841Spraks int vpmd_prevpagelocked;
1201841Spraks int vpmd_getpagefailed;
1211841Spraks int vpmd_zerostart;
1221841Spraks int vpmd_emptyfreelist;
1231841Spraks int vpmd_nofreevpms;
1241841Spraks } vpm_debug;
1251841Spraks
1261841Spraks #define VPM_DEBUG(x) ((vpm_debug.x)++)
1271841Spraks
1281841Spraks int steals;
1291841Spraks int steals_mtbf = 7;
1301841Spraks int contend;
1311841Spraks int contend_mtbf = 127;
1321841Spraks
1331841Spraks #define VPM_MTBF(v, f) (((++(v)) & (f)) != (f))
1341841Spraks
1351841Spraks #else /* DEBUG */
1361841Spraks
1371841Spraks #define VPM_MTBF(v, f) (1)
1381841Spraks #define VPM_DEBUG(x) /* nothing */
1391841Spraks
1401841Spraks #endif
1411841Spraks
1421841Spraks /*
1431841Spraks * The vpm cache.
1441841Spraks *
1451841Spraks * The main purpose of having a cache here is to speed up page_lookup()
1461841Spraks * operations and also provide an LRU(default) behaviour of file pages. The
1471841Spraks * page_lookup() operation tends to be expensive if a page has to be
1481841Spraks * reclaimed from the system page cache("cachelist"). Once we speed up the
1491841Spraks * page_lookup()->page_reclaim() path then there there should be no need for
1501841Spraks * this cache. The system page cache(cachelist) should effectively serve the
1511841Spraks * purpose of caching file pages.
1521841Spraks *
1531841Spraks * This cache is very similar to segmap's smap cache. Each page in the
1541841Spraks * cache is tracked by the structure vpmap_t. But unlike segmap, there is no
1551841Spraks * hash table. The page_t has a reference to the vpmap_t when cached. For a
1561841Spraks * given vnode, offset the page is found by means of a page_lookup() operation.
1571841Spraks * Any page which has a mapping(i.e when cached) will not be in the
1581841Spraks * system 'cachelist'. Hence the page_lookup() will not have to do a
1591841Spraks * page_reclaim(). That is how the cache serves to speed up page_lookup()
1601841Spraks * operations.
1611841Spraks *
1621841Spraks * This cache can be disabled by setting vpm_cache_enable = 0 in /etc/system.
1631841Spraks */
1641841Spraks
1651841Spraks void
vpm_init()1661841Spraks vpm_init()
1671841Spraks {
1681841Spraks long npages;
1691841Spraks struct vpmap *vpm;
1701841Spraks struct vpmfree *vpmflp;
1711841Spraks int i, ndx;
1721841Spraks extern void prefetch_smap_w(void *);
1731841Spraks
174*9281SPrakash.Sangappa@Sun.COM if (!kpm_enable) {
175*9281SPrakash.Sangappa@Sun.COM vpm_enable = 0;
176*9281SPrakash.Sangappa@Sun.COM }
177*9281SPrakash.Sangappa@Sun.COM
178*9281SPrakash.Sangappa@Sun.COM if (!vpm_enable || !vpm_cache_enable) {
1791841Spraks return;
1801841Spraks }
1811841Spraks
1821841Spraks /*
1831841Spraks * Set the size of the cache.
1841841Spraks */
1851841Spraks vpm_cache_size = mmu_ptob((physmem * vpm_cache_percent)/100);
1861841Spraks if (vpm_cache_size < VPMAP_MINCACHE) {
1871841Spraks vpm_cache_size = VPMAP_MINCACHE;
1881841Spraks }
1891841Spraks
190*9281SPrakash.Sangappa@Sun.COM if (vpm_cache_size > VPMAP_MAXCACHE) {
191*9281SPrakash.Sangappa@Sun.COM vpm_cache_size = VPMAP_MAXCACHE;
192*9281SPrakash.Sangappa@Sun.COM }
193*9281SPrakash.Sangappa@Sun.COM
1941841Spraks /*
1951841Spraks * Number of freelists.
1961841Spraks */
1971841Spraks if (vpm_nfreelist == 0) {
1981841Spraks vpm_nfreelist = max_ncpus;
1991841Spraks } else if (vpm_nfreelist < 0 || vpm_nfreelist > 2 * max_ncpus) {
2001841Spraks cmn_err(CE_WARN, "vpmap create : number of freelist "
2011841Spraks "vpm_nfreelist %d using %d", vpm_nfreelist, max_ncpus);
2021841Spraks vpm_nfreelist = 2 * max_ncpus;
2031841Spraks }
2041841Spraks
2051841Spraks /*
2061841Spraks * Round it up to the next power of 2
2071841Spraks */
2081841Spraks if (vpm_nfreelist & (vpm_nfreelist - 1)) {
2091841Spraks vpm_nfreelist = 1 << (highbit(vpm_nfreelist));
2101841Spraks }
2111841Spraks vpmd_freemsk = vpm_nfreelist - 1;
2121841Spraks
2131841Spraks /*
2141841Spraks * Use a per cpu rotor index to spread the allocations evenly
2151841Spraks * across the available vpm freelists.
2161841Spraks */
2171841Spraks vpmd_cpu = kmem_zalloc(sizeof (union vpm_cpu) * max_ncpus, KM_SLEEP);
2181841Spraks ndx = 0;
2191841Spraks for (i = 0; i < max_ncpus; i++) {
2201841Spraks
2211841Spraks vpmd_cpu[i].vfree_ndx = ndx;
2221841Spraks ndx = (ndx + 1) & vpmd_freemsk;
2231841Spraks }
2241841Spraks
2251841Spraks /*
2261841Spraks * Allocate and initialize the freelist.
2271841Spraks */
2281841Spraks vpmd_free = kmem_zalloc(vpm_nfreelist * sizeof (struct vpmfree),
229*9281SPrakash.Sangappa@Sun.COM KM_SLEEP);
2301841Spraks for (i = 0; i < vpm_nfreelist; i++) {
2311841Spraks
2321841Spraks vpmflp = &vpmd_free[i];
2331841Spraks /*
2341841Spraks * Set up initial queue pointers. They will get flipped
2351841Spraks * back and forth.
2361841Spraks */
2371841Spraks vpmflp->vpm_allocq = &vpmflp->vpm_freeq[VPMALLOCQ];
2381841Spraks vpmflp->vpm_releq = &vpmflp->vpm_freeq[VPMRELEQ];
2391841Spraks }
2401841Spraks
2411841Spraks npages = mmu_btop(vpm_cache_size);
2421841Spraks
2431841Spraks
2441841Spraks /*
245*9281SPrakash.Sangappa@Sun.COM * Allocate and initialize the vpmap structs. We need to
246*9281SPrakash.Sangappa@Sun.COM * walk the array backwards as the prefetch happens in reverse
247*9281SPrakash.Sangappa@Sun.COM * order.
2481841Spraks */
249*9281SPrakash.Sangappa@Sun.COM vpmd_vpmap = kmem_alloc(sizeof (struct vpmap) * npages, KM_SLEEP);
250*9281SPrakash.Sangappa@Sun.COM for (vpm = &vpmd_vpmap[npages - 1]; vpm >= vpmd_vpmap; vpm--) {
2511841Spraks struct vpmfree *vpmflp;
2521841Spraks union vpm_freeq *releq;
2531841Spraks struct vpmap *vpmapf;
2541841Spraks
2551841Spraks /*
2561841Spraks * Use prefetch as we have to walk thru a large number of
2571841Spraks * these data structures. We just use the smap's prefetch
258*9281SPrakash.Sangappa@Sun.COM * routine as it does the same.
2591841Spraks */
2601841Spraks prefetch_smap_w((void *)vpm);
2611841Spraks
262*9281SPrakash.Sangappa@Sun.COM vpm->vpm_vp = NULL;
263*9281SPrakash.Sangappa@Sun.COM vpm->vpm_off = 0;
264*9281SPrakash.Sangappa@Sun.COM vpm->vpm_pp = NULL;
265*9281SPrakash.Sangappa@Sun.COM vpm->vpm_refcnt = 0;
266*9281SPrakash.Sangappa@Sun.COM mutex_init(&vpm->vpm_mtx, NULL, MUTEX_DEFAULT, NULL);
2671841Spraks vpm->vpm_free_ndx = VPMAP2VMF_NDX(vpm);
2681841Spraks
2691841Spraks vpmflp = VPMAP2VMF(vpm);
2701841Spraks releq = vpmflp->vpm_releq;
2711841Spraks
2721841Spraks vpmapf = releq->vpmq_free;
2731841Spraks if (vpmapf == NULL) {
2741841Spraks releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm;
2751841Spraks } else {
2761841Spraks vpm->vpm_next = vpmapf;
2771841Spraks vpm->vpm_prev = vpmapf->vpm_prev;
2781841Spraks vpmapf->vpm_prev = vpm;
2791841Spraks vpm->vpm_prev->vpm_next = vpm;
2801841Spraks releq->vpmq_free = vpm->vpm_next;
2811841Spraks }
2821841Spraks
2831841Spraks /*
2841841Spraks * Indicate that the vpmap is on the releq at start
2851841Spraks */
2861841Spraks vpm->vpm_ndxflg = VPMRELEQ;
2871841Spraks }
2881841Spraks }
2891841Spraks
2901841Spraks
2911841Spraks /*
2921841Spraks * unhooks vpm from the freelist if it is still on the freelist.
2931841Spraks */
2941841Spraks #define VPMAP_RMFREELIST(vpm) \
2951841Spraks { \
2961841Spraks if (vpm->vpm_next != NULL) { \
2971841Spraks union vpm_freeq *freeq; \
2981841Spraks struct vpmfree *vpmflp; \
2991841Spraks vpmflp = &vpmd_free[vpm->vpm_free_ndx]; \
3001841Spraks freeq = &vpmflp->vpm_freeq[vpm->vpm_ndxflg]; \
3011841Spraks mutex_enter(&freeq->vpmq_mtx); \
3021841Spraks if (freeq->vpmq_free != vpm) { \
3031841Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; \
3041841Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; \
3051841Spraks } else if (vpm == vpm->vpm_next) { \
3061841Spraks freeq->vpmq_free = NULL; \
3071841Spraks } else { \
3081841Spraks freeq->vpmq_free = vpm->vpm_next; \
3091841Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; \
3101841Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; \
3111841Spraks } \
3121841Spraks mutex_exit(&freeq->vpmq_mtx); \
3131841Spraks vpm->vpm_next = vpm->vpm_prev = NULL; \
3141841Spraks } \
3151841Spraks }
3161841Spraks
3171841Spraks static int
get_freelndx(int mode)3181841Spraks get_freelndx(int mode)
3191841Spraks {
3201841Spraks int ndx;
3211841Spraks
3221841Spraks ndx = vpmd_cpu[CPU->cpu_seqid].vfree_ndx & vpmd_freemsk;
3231841Spraks switch (mode) {
3241841Spraks
3251841Spraks case VPMCACHE_LRU:
3261841Spraks default:
3271841Spraks vpmd_cpu[CPU->cpu_seqid].vfree_ndx++;
3281841Spraks break;
3291841Spraks }
3301841Spraks return (ndx);
3311841Spraks }
3321841Spraks
3331841Spraks
3341841Spraks /*
3351841Spraks * Find one vpmap structure from the free lists and use it for the newpage.
3361841Spraks * The previous page it cached is dissociated and released. The page_t's
3371841Spraks * p_vpmref is cleared only when the vpm it is pointing to is locked(or
3381841Spraks * for AMD64 when the page is exclusively locked in page_unload. That is
3391841Spraks * because the p_vpmref is treated as mapping).
3401841Spraks *
3411841Spraks * The page's p_vpmref is set when the page is
3421841Spraks * locked(at least SHARED locked).
3431841Spraks */
3441841Spraks static struct vpmap *
get_free_vpmap(page_t * newpage)3451841Spraks get_free_vpmap(page_t *newpage)
3461841Spraks {
3471841Spraks struct vpmfree *vpmflp;
3481841Spraks kmutex_t *vmtx;
3491841Spraks struct vpmap *vpm, *first;
3501841Spraks union vpm_freeq *allocq, *releq;
3511841Spraks page_t *pp = NULL;
3521841Spraks int end_ndx, page_locked = 0;
3531841Spraks int free_ndx;
3541841Spraks
3551841Spraks /*
3561841Spraks * get the freelist bin index.
3571841Spraks */
3581841Spraks free_ndx = get_freelndx(vpm_cachemode);
3591841Spraks
3601841Spraks end_ndx = free_ndx;
3611841Spraks vpmflp = &vpmd_free[free_ndx];
3621841Spraks
3631841Spraks retry_queue:
3641841Spraks allocq = vpmflp->vpm_allocq;
3651841Spraks mutex_enter(&allocq->vpmq_mtx);
3661841Spraks
3671841Spraks if ((vpm = allocq->vpmq_free) == NULL) {
3681841Spraks
3691841Spraks skip_queue:
3701841Spraks /*
3711841Spraks * The alloc list is empty or this queue is being skipped;
3721841Spraks * first see if the allocq toggled.
3731841Spraks */
3741841Spraks if (vpmflp->vpm_allocq != allocq) {
3751841Spraks /* queue changed */
3761841Spraks mutex_exit(&allocq->vpmq_mtx);
3771841Spraks goto retry_queue;
3781841Spraks }
3791841Spraks releq = vpmflp->vpm_releq;
3801841Spraks if (!mutex_tryenter(&releq->vpmq_mtx)) {
3811841Spraks /* cannot get releq; a free vpmap may be there now */
3821841Spraks mutex_exit(&allocq->vpmq_mtx);
3831841Spraks
3841841Spraks /*
3851841Spraks * This loop could spin forever if this thread has
3861841Spraks * higher priority than the thread that is holding
3871841Spraks * releq->vpmq_mtx. In order to force the other thread
3881841Spraks * to run, we'll lock/unlock the mutex which is safe
3891841Spraks * since we just unlocked the allocq mutex.
3901841Spraks */
3911841Spraks mutex_enter(&releq->vpmq_mtx);
3921841Spraks mutex_exit(&releq->vpmq_mtx);
3931841Spraks goto retry_queue;
3941841Spraks }
3951841Spraks if (releq->vpmq_free == NULL) {
3961841Spraks VPM_DEBUG(vpmd_emptyfreelist);
3971841Spraks /*
3981841Spraks * This freelist is empty.
3991841Spraks * This should not happen unless clients
4001841Spraks * are failing to release the vpmap after
4011841Spraks * accessing the data. Before resorting
4021841Spraks * to sleeping, try the next list of the same color.
4031841Spraks */
4041841Spraks free_ndx = (free_ndx + 1) & vpmd_freemsk;
4051841Spraks if (free_ndx != end_ndx) {
4061841Spraks mutex_exit(&releq->vpmq_mtx);
4071841Spraks mutex_exit(&allocq->vpmq_mtx);
4081841Spraks vpmflp = &vpmd_free[free_ndx];
4091841Spraks goto retry_queue;
4101841Spraks }
4111841Spraks /*
4121841Spraks * Tried all freelists.
4131841Spraks * wait on this list and hope something gets freed.
4141841Spraks */
4151841Spraks vpmflp->vpm_want++;
4161841Spraks mutex_exit(&vpmflp->vpm_freeq[1].vpmq_mtx);
4171841Spraks cv_wait(&vpmflp->vpm_free_cv,
418*9281SPrakash.Sangappa@Sun.COM &vpmflp->vpm_freeq[0].vpmq_mtx);
4191841Spraks vpmflp->vpm_want--;
4201841Spraks mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx);
4211841Spraks vpmflp = &vpmd_free[free_ndx];
4221841Spraks VPM_DEBUG(vpmd_nofreevpms);
4231841Spraks goto retry_queue;
4241841Spraks } else {
4251841Spraks /*
4261841Spraks * Something on the rele queue; flip the alloc
4271841Spraks * and rele queues and retry.
4281841Spraks */
4291841Spraks vpmflp->vpm_allocq = releq;
4301841Spraks vpmflp->vpm_releq = allocq;
4311841Spraks mutex_exit(&allocq->vpmq_mtx);
4321841Spraks mutex_exit(&releq->vpmq_mtx);
4331841Spraks if (page_locked) {
4341841Spraks delay(hz >> 2);
4351841Spraks page_locked = 0;
4361841Spraks }
4371841Spraks goto retry_queue;
4381841Spraks }
4391841Spraks } else {
4401841Spraks int gotnewvpm;
4411841Spraks kmutex_t *pmtx;
4421841Spraks uint_t vpmref;
4431841Spraks
4441841Spraks /*
4451841Spraks * Fastpath the case we get the vpmap mutex
4461841Spraks * on the first try.
4471841Spraks */
4481841Spraks first = vpm;
4491841Spraks next_vpmap:
4501841Spraks vmtx = VPMAPMTX(vpm);
4511841Spraks if (!mutex_tryenter(vmtx)) {
4521841Spraks /*
4531841Spraks * Another thread is trying to reclaim this slot.
4541841Spraks * Skip to the next queue or vpmap.
4551841Spraks */
4561841Spraks if ((vpm = vpm->vpm_next) == first) {
4571841Spraks goto skip_queue;
4581841Spraks } else {
4591841Spraks goto next_vpmap;
4601841Spraks }
4611841Spraks }
4621841Spraks
4631841Spraks /*
4641841Spraks * Assign this vpm to the newpage.
4651841Spraks */
4661841Spraks pmtx = PPMTX(newpage);
4671841Spraks gotnewvpm = 0;
4681841Spraks mutex_enter(pmtx);
4691841Spraks
4701841Spraks /*
4711841Spraks * Check if some other thread already assigned a vpm to
4721841Spraks * this page.
4731841Spraks */
4741841Spraks if ((vpmref = newpage->p_vpmref) == 0) {
4751841Spraks newpage->p_vpmref = VPMID(vpm);
4761841Spraks gotnewvpm = 1;
4771841Spraks } else {
4781841Spraks VPM_DEBUG(vpmd_contend);
4791841Spraks mutex_exit(vmtx);
4801841Spraks }
4811841Spraks mutex_exit(pmtx);
4821841Spraks
4831841Spraks if (gotnewvpm) {
4841841Spraks
4851841Spraks /*
4861841Spraks * At this point, we've selected the vpm. Remove vpm
4871841Spraks * from its freelist. If vpm is the first one in
4881841Spraks * the freelist, update the head of the freelist.
4891841Spraks */
4901841Spraks if (first == vpm) {
4911841Spraks ASSERT(first == allocq->vpmq_free);
4921841Spraks allocq->vpmq_free = vpm->vpm_next;
4931841Spraks }
4941841Spraks
4951841Spraks /*
4961841Spraks * If the head of the freelist still points to vpm,
4971841Spraks * then there are no more free vpmaps in that list.
4981841Spraks */
4991841Spraks if (allocq->vpmq_free == vpm)
5001841Spraks /*
5011841Spraks * Took the last one
5021841Spraks */
5031841Spraks allocq->vpmq_free = NULL;
5041841Spraks else {
5051841Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next;
5061841Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev;
5071841Spraks }
5081841Spraks mutex_exit(&allocq->vpmq_mtx);
5091841Spraks vpm->vpm_prev = vpm->vpm_next = NULL;
5101841Spraks
5111841Spraks /*
512*9281SPrakash.Sangappa@Sun.COM * Disassociate the previous page.
5131841Spraks * p_vpmref is used as a mapping reference to the page.
5141841Spraks */
5151841Spraks if ((pp = vpm->vpm_pp) != NULL &&
516*9281SPrakash.Sangappa@Sun.COM vpm->vpm_vp == pp->p_vnode &&
517*9281SPrakash.Sangappa@Sun.COM vpm->vpm_off == pp->p_offset) {
5181841Spraks
5191841Spraks pmtx = PPMTX(pp);
5201841Spraks if (page_trylock(pp, SE_SHARED)) {
5211841Spraks /*
5221841Spraks * Now verify that it is the correct
5231841Spraks * page. If not someone else stole it,
5241841Spraks * so just unlock it and leave.
5251841Spraks */
5261841Spraks mutex_enter(pmtx);
5271841Spraks if (PP_ISFREE(pp) ||
528*9281SPrakash.Sangappa@Sun.COM vpm->vpm_vp != pp->p_vnode ||
529*9281SPrakash.Sangappa@Sun.COM vpm->vpm_off != pp->p_offset ||
530*9281SPrakash.Sangappa@Sun.COM pp->p_vpmref != VPMID(vpm)) {
5311841Spraks mutex_exit(pmtx);
5321841Spraks
5331841Spraks page_unlock(pp);
5341841Spraks } else {
5351841Spraks /*
5361841Spraks * Release the page.
5371841Spraks */
5381841Spraks pp->p_vpmref = 0;
5391841Spraks mutex_exit(pmtx);
5401841Spraks (void) page_release(pp, 1);
5411841Spraks }
5421841Spraks } else {
5431841Spraks /*
5441841Spraks * If the page cannot be locked, just
5451841Spraks * clear the p_vpmref and go.
5461841Spraks */
5471841Spraks mutex_enter(pmtx);
5481841Spraks if (pp->p_vpmref == VPMID(vpm)) {
5491841Spraks pp->p_vpmref = 0;
5501841Spraks }
5511841Spraks mutex_exit(pmtx);
5521841Spraks VPM_DEBUG(vpmd_prevpagelocked);
5531841Spraks }
5541841Spraks }
5551841Spraks
5561841Spraks /*
5571841Spraks * Setup vpm to point to the new page.
5581841Spraks */
5591841Spraks vpm->vpm_pp = newpage;
5601841Spraks vpm->vpm_vp = newpage->p_vnode;
5611841Spraks vpm->vpm_off = newpage->p_offset;
5621841Spraks
5631841Spraks } else {
5641841Spraks int steal = !VPM_MTBF(steals, steals_mtbf);
5651841Spraks /*
5661841Spraks * Page already has a vpm assigned just use that.
5671841Spraks * Grab the vpm mutex and verify that it is still
5681841Spraks * the correct one. The pp->p_vpmref should not change
5691841Spraks * once we have the vpm mutex and the page lock.
5701841Spraks */
5711841Spraks mutex_exit(&allocq->vpmq_mtx);
5721841Spraks vpm = VPMP(vpmref);
5731841Spraks vmtx = VPMAPMTX(vpm);
5741841Spraks mutex_enter(vmtx);
5751841Spraks if ((steal && vpm->vpm_refcnt == 0) ||
5761841Spraks vpm->vpm_pp != newpage) {
5771841Spraks /*
5781841Spraks * The vpm got stolen, retry.
5791841Spraks * clear the p_vpmref.
5801841Spraks */
5811841Spraks pmtx = PPMTX(newpage);
5821841Spraks mutex_enter(pmtx);
5831841Spraks if (newpage->p_vpmref == vpmref) {
5841841Spraks newpage->p_vpmref = 0;
5851841Spraks }
5861841Spraks mutex_exit(pmtx);
5871841Spraks
5881841Spraks mutex_exit(vmtx);
5891841Spraks VPM_DEBUG(vpmd_steals);
5901841Spraks goto retry_queue;
5911841Spraks } else if (vpm->vpm_refcnt == 0) {
5921841Spraks /*
5931841Spraks * Remove it from the free list if it
5941841Spraks * exists there.
5951841Spraks */
5961841Spraks VPMAP_RMFREELIST(vpm);
5971841Spraks }
5981841Spraks }
5991841Spraks return (vpm);
6001841Spraks }
6011841Spraks }
6021841Spraks
6031841Spraks static void
free_vpmap(struct vpmap * vpm)6041841Spraks free_vpmap(struct vpmap *vpm)
6051841Spraks {
6061841Spraks struct vpmfree *vpmflp;
6071841Spraks struct vpmap *vpmfreelist;
6081841Spraks union vpm_freeq *releq;
6091841Spraks
6101841Spraks ASSERT(MUTEX_HELD(VPMAPMTX(vpm)));
6111841Spraks
6121841Spraks if (vpm->vpm_refcnt != 0) {
6131841Spraks panic("free_vpmap");
6141841Spraks /*NOTREACHED*/
6151841Spraks }
6161841Spraks
6171841Spraks vpmflp = &vpmd_free[vpm->vpm_free_ndx];
6181841Spraks /*
6191841Spraks * Add to the tail of the release queue
6201841Spraks * Note that vpm_releq and vpm_allocq could toggle
6211841Spraks * before we get the lock. This does not affect
6221841Spraks * correctness as the 2 queues are only maintained
6231841Spraks * to reduce lock pressure.
6241841Spraks */
6251841Spraks releq = vpmflp->vpm_releq;
6261841Spraks if (releq == &vpmflp->vpm_freeq[0]) {
6271841Spraks vpm->vpm_ndxflg = 0;
6281841Spraks } else {
6291841Spraks vpm->vpm_ndxflg = 1;
6301841Spraks }
6311841Spraks mutex_enter(&releq->vpmq_mtx);
6321841Spraks vpmfreelist = releq->vpmq_free;
6331841Spraks if (vpmfreelist == 0) {
6341841Spraks int want;
6351841Spraks
6361841Spraks releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm;
6371841Spraks /*
6381841Spraks * Both queue mutexes are held to set vpm_want;
6391841Spraks * snapshot the value before dropping releq mutex.
6401841Spraks * If vpm_want appears after the releq mutex is dropped,
6411841Spraks * then the vpmap just freed is already gone.
6421841Spraks */
6431841Spraks want = vpmflp->vpm_want;
6441841Spraks mutex_exit(&releq->vpmq_mtx);
6451841Spraks /*
6461841Spraks * See if there was a waiter before dropping the releq mutex
6471841Spraks * then recheck after obtaining vpm_freeq[0] mutex as
6481841Spraks * the another thread may have already signaled.
6491841Spraks */
6501841Spraks if (want) {
6511841Spraks mutex_enter(&vpmflp->vpm_freeq[0].vpmq_mtx);
6521841Spraks if (vpmflp->vpm_want)
6531841Spraks cv_signal(&vpmflp->vpm_free_cv);
6541841Spraks mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx);
6551841Spraks }
6561841Spraks } else {
6571841Spraks vpm->vpm_next = vpmfreelist;
6581841Spraks vpm->vpm_prev = vpmfreelist->vpm_prev;
6591841Spraks vpmfreelist->vpm_prev = vpm;
6601841Spraks vpm->vpm_prev->vpm_next = vpm;
6611841Spraks mutex_exit(&releq->vpmq_mtx);
6621841Spraks }
6631841Spraks }
6641841Spraks
6651841Spraks /*
6661841Spraks * Get the vpmap for the page.
6671841Spraks * The refcnt of this vpm is incremented.
6681841Spraks */
6691841Spraks static struct vpmap *
get_vpmap(page_t * pp)6701841Spraks get_vpmap(page_t *pp)
6711841Spraks {
6721841Spraks struct vpmap *vpm = NULL;
6731841Spraks kmutex_t *vmtx;
6741841Spraks kmutex_t *pmtx;
6751841Spraks unsigned int refid;
6761841Spraks
6771841Spraks ASSERT((pp != NULL) && PAGE_LOCKED(pp));
6781841Spraks
6791841Spraks if (VPM_MTBF(contend, contend_mtbf) && (refid = pp->p_vpmref) != 0) {
6801841Spraks vpm = VPMP(refid);
6811841Spraks vmtx = VPMAPMTX(vpm);
6821841Spraks mutex_enter(vmtx);
6831841Spraks /*
6841841Spraks * Since we have the page lock and the vpm mutex, the
6851841Spraks * pp->p_vpmref cannot change.
6861841Spraks */
6871841Spraks if (vpm->vpm_pp != pp) {
6881841Spraks pmtx = PPMTX(pp);
6891841Spraks
6901841Spraks /*
6911841Spraks * Clear the p_vpmref as it is incorrect.
6921841Spraks * This can happen if the page was stolen.
6931841Spraks * On x64 this should not happen as p_vpmref
6941841Spraks * is treated as a mapping on the page. So
6951841Spraks * if the page is stolen, the mapping would have
6961841Spraks * been cleared in page_unload().
6971841Spraks */
6981841Spraks mutex_enter(pmtx);
6991841Spraks if (pp->p_vpmref == refid)
7001841Spraks pp->p_vpmref = 0;
7011841Spraks mutex_exit(pmtx);
7021841Spraks
7031841Spraks mutex_exit(vmtx);
7041841Spraks vpm = NULL;
7051841Spraks } else if (vpm->vpm_refcnt == 0) {
7061841Spraks /*
7071841Spraks * Got the vpm, remove it from the free
7081841Spraks * list if it exists there.
7091841Spraks */
7101841Spraks VPMAP_RMFREELIST(vpm);
7111841Spraks }
7121841Spraks }
7131841Spraks if (vpm == NULL) {
7141841Spraks /*
7151841Spraks * get_free_vpmap() returns with the vpmap mutex held.
7161841Spraks */
7171841Spraks vpm = get_free_vpmap(pp);
7181841Spraks vmtx = VPMAPMTX(vpm);
7191841Spraks vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_misses++;
7201841Spraks } else {
7211841Spraks vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_hits++;
7221841Spraks }
7231841Spraks
7241841Spraks vpm->vpm_refcnt++;
7251841Spraks mutex_exit(vmtx);
7261841Spraks
7271841Spraks return (vpm);
7281841Spraks }
7291841Spraks
7301841Spraks /* END --- vpm cache ---- */
7311841Spraks
7321841Spraks /*
7331841Spraks * The vnode page mapping(vpm) interface routines.
7341841Spraks */
7351841Spraks
7361841Spraks /*
7371841Spraks * Find or create the pages starting form baseoff for specified
7381841Spraks * length 'len'.
7391841Spraks */
7401841Spraks static int
vpm_pagecreate(struct vnode * vp,u_offset_t baseoff,size_t len,vmap_t vml[],int nseg,int * newpage)7411841Spraks vpm_pagecreate(
7421841Spraks struct vnode *vp,
7431841Spraks u_offset_t baseoff,
7441841Spraks size_t len,
7451841Spraks vmap_t vml[],
7461841Spraks int nseg,
7471841Spraks int *newpage)
7481841Spraks {
7491841Spraks
7501841Spraks page_t *pp = NULL;
7511841Spraks caddr_t base;
7521841Spraks u_offset_t off = baseoff;
7531841Spraks int i;
754*9281SPrakash.Sangappa@Sun.COM ASSERT(nseg >= MINVMAPS && nseg <= MAXVMAPS);
7551841Spraks
7562353Spraks for (i = 0; len > 0; len -= PAGESIZE, i++) {
7571841Spraks struct vpmap *vpm;
7581841Spraks
7591841Spraks
7601841Spraks if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7611841Spraks
7621841Spraks base = segkpm_create_va(off);
7631841Spraks
7641841Spraks /*
7651841Spraks * the seg pointer passed in is just advisor. Just
7661841Spraks * pass segkmap for now like segmap does with
7671841Spraks * segmap_kpm enabled.
7681841Spraks */
7691841Spraks if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
7701841Spraks segkmap, base)) == NULL) {
7711841Spraks panic("segmap_pagecreate_vpm: "
7721841Spraks "page_create failed");
7731841Spraks /*NOTREACHED*/
7741841Spraks }
7751841Spraks if (newpage != NULL)
7761841Spraks *newpage = 1;
7771841Spraks
7781841Spraks page_io_unlock(pp);
7791841Spraks }
7801841Spraks
7811841Spraks /*
7821841Spraks * Get the vpm for this page_t.
7831841Spraks */
7841841Spraks if (vpm_cache_enable) {
7851841Spraks vpm = get_vpmap(pp);
7861841Spraks vml[i].vs_data = (void *)&vpm->vpm_pp;
7871841Spraks } else {
7881841Spraks vml[i].vs_data = (void *)pp;
7891841Spraks pp->p_vpmref = 0;
7901841Spraks }
7911841Spraks
7921841Spraks vml[i].vs_addr = hat_kpm_mapin(pp, 0);
7931841Spraks vml[i].vs_len = PAGESIZE;
7941841Spraks
7951841Spraks off += PAGESIZE;
7961841Spraks }
7971841Spraks vml[i].vs_data = NULL;
7981841Spraks vml[i].vs_addr = (caddr_t)NULL;
7991841Spraks return (0);
8001841Spraks }
8011841Spraks
8021841Spraks
8031841Spraks /*
8041841Spraks * Returns vpm mappings of pages in the range [off, off+len], where
8051841Spraks * len is rounded up to the PAGESIZE boundary. The list of pages and
8061841Spraks * the page addresses are returned in the SGL vml (vmap_t) array passed in.
8071841Spraks * The nseg is the number of vmap_t entries in the array.
8081841Spraks *
8091841Spraks * The segmap's SM_LOCKPROTO usage is not supported by these interfaces.
8101841Spraks * For such cases, use the seg_map interfaces.
8111841Spraks */
8121841Spraks int
vpm_map_pages(struct vnode * vp,u_offset_t off,size_t len,int fetchpage,vmap_t * vml,int nseg,int * newpage,enum seg_rw rw)8131841Spraks vpm_map_pages(
8141841Spraks struct vnode *vp,
8151841Spraks u_offset_t off,
8161841Spraks size_t len,
8171841Spraks int fetchpage,
8181841Spraks vmap_t *vml,
8191841Spraks int nseg,
8201841Spraks int *newpage,
8211841Spraks enum seg_rw rw)
8221841Spraks {
8231841Spraks extern struct vnode *common_specvp();
8241841Spraks u_offset_t baseoff;
8251841Spraks uint_t prot;
8261841Spraks caddr_t base;
8271841Spraks page_t *pp, *pplist[MAXVMAPS];
8281841Spraks struct vpmap *vpm;
8291841Spraks int i, error = 0;
830*9281SPrakash.Sangappa@Sun.COM size_t tlen;
8311841Spraks
832*9281SPrakash.Sangappa@Sun.COM ASSERT(nseg >= MINVMAPS && nseg <= MAXVMAPS);
8331841Spraks baseoff = off & (offset_t)PAGEMASK;
8341841Spraks vml[0].vs_data = NULL;
8351841Spraks vml[0].vs_addr = (caddr_t)NULL;
836*9281SPrakash.Sangappa@Sun.COM
837*9281SPrakash.Sangappa@Sun.COM tlen = P2ROUNDUP(off + len, PAGESIZE) - baseoff;
8381841Spraks /*
839*9281SPrakash.Sangappa@Sun.COM * Restrict it to VPMMAXLEN.
8401841Spraks */
841*9281SPrakash.Sangappa@Sun.COM if (tlen > (VPMMAXPGS * PAGESIZE)) {
842*9281SPrakash.Sangappa@Sun.COM tlen = VPMMAXPGS * PAGESIZE;
8431841Spraks }
844*9281SPrakash.Sangappa@Sun.COM /*
845*9281SPrakash.Sangappa@Sun.COM * Ensure length fits within the vml[] array. One element of
846*9281SPrakash.Sangappa@Sun.COM * the array is used to mark the end of the scatter/gather list
847*9281SPrakash.Sangappa@Sun.COM * of valid mappings by setting its vs_addr = NULL. Leave space
848*9281SPrakash.Sangappa@Sun.COM * for this element.
849*9281SPrakash.Sangappa@Sun.COM */
850*9281SPrakash.Sangappa@Sun.COM if (tlen > ((nseg - 1) * PAGESIZE)) {
851*9281SPrakash.Sangappa@Sun.COM tlen = ((nseg - 1) * PAGESIZE);
852*9281SPrakash.Sangappa@Sun.COM }
853*9281SPrakash.Sangappa@Sun.COM len = tlen;
8541841Spraks
8551841Spraks /*
8561841Spraks * If this is a block device we have to be sure to use the
8571841Spraks * "common" block device vnode for the mapping.
8581841Spraks */
8591841Spraks if (vp->v_type == VBLK)
8601841Spraks vp = common_specvp(vp);
8611841Spraks
8621841Spraks
8631841Spraks if (!fetchpage)
8641841Spraks return (vpm_pagecreate(vp, baseoff, len, vml, nseg, newpage));
8651841Spraks
8662353Spraks for (i = 0; len > 0; len -= PAGESIZE, i++, pplist[i] = NULL) {
8671841Spraks
8681841Spraks pp = page_lookup(vp, baseoff, SE_SHARED);
8691841Spraks
8701841Spraks /*
8711841Spraks * If we did not find the page or if this page was not
872*9281SPrakash.Sangappa@Sun.COM * in vpm cache(p_vpmref == 0), then let VOP_GETPAGE get
873*9281SPrakash.Sangappa@Sun.COM * all the pages.
8741841Spraks * We need to call VOP_GETPAGE so that filesytems can do some
8751841Spraks * (un)necessary tracking for sequential access.
8761841Spraks */
8771841Spraks
8781841Spraks if (pp == NULL || (vpm_cache_enable && pp->p_vpmref == 0) ||
879*9281SPrakash.Sangappa@Sun.COM (rw == S_WRITE && hat_page_getattr(pp, P_MOD | P_REF)
880*9281SPrakash.Sangappa@Sun.COM != (P_MOD | P_REF))) {
881*9281SPrakash.Sangappa@Sun.COM int j;
8821841Spraks if (pp != NULL) {
8831841Spraks page_unlock(pp);
8841841Spraks }
885*9281SPrakash.Sangappa@Sun.COM /*
886*9281SPrakash.Sangappa@Sun.COM * If we did not find the desired set of pages,
887*9281SPrakash.Sangappa@Sun.COM * from the page cache, just call VOP_GETPAGE to get
888*9281SPrakash.Sangappa@Sun.COM * all the pages.
889*9281SPrakash.Sangappa@Sun.COM */
890*9281SPrakash.Sangappa@Sun.COM for (j = 0; j < i; j++) {
891*9281SPrakash.Sangappa@Sun.COM page_unlock(pplist[j]);
892*9281SPrakash.Sangappa@Sun.COM }
8931841Spraks
894*9281SPrakash.Sangappa@Sun.COM
895*9281SPrakash.Sangappa@Sun.COM baseoff = off & (offset_t)PAGEMASK;
8961841Spraks /*
8971841Spraks * Pass a dummy address as it will be required
8981841Spraks * by page_create_va(). We pass segkmap as the seg
8991841Spraks * as some file systems(UFS) check it.
9001841Spraks */
9011841Spraks base = segkpm_create_va(baseoff);
9021841Spraks
903*9281SPrakash.Sangappa@Sun.COM error = VOP_GETPAGE(vp, baseoff, tlen, &prot, pplist,
904*9281SPrakash.Sangappa@Sun.COM tlen, segkmap, base, rw, CRED(), NULL);
9051841Spraks if (error) {
9061841Spraks VPM_DEBUG(vpmd_getpagefailed);
907*9281SPrakash.Sangappa@Sun.COM pplist[0] = NULL;
9081841Spraks }
9091841Spraks break;
9101841Spraks } else {
9111841Spraks pplist[i] = pp;
9121841Spraks baseoff += PAGESIZE;
9131841Spraks }
9141841Spraks }
9151841Spraks
9161841Spraks if (error) {
9171841Spraks for (i = 0; pplist[i] != NULL; i++) {
9181841Spraks page_unlock(pplist[i]);
9191841Spraks pplist[i] = NULL;
9201841Spraks }
9211841Spraks vml[0].vs_addr = NULL;
9221841Spraks vml[0].vs_data = NULL;
9232970Spraks return (error);
9241841Spraks }
9251841Spraks
9261841Spraks /*
9271841Spraks * Get the vpm's for pages.
9281841Spraks */
9291841Spraks for (i = 0; pplist[i] != NULL; i++) {
9301841Spraks if (vpm_cache_enable) {
9311841Spraks vpm = get_vpmap(pplist[i]);
9321841Spraks vml[i].vs_data = (void *)&(vpm->vpm_pp);
9331841Spraks } else {
9341841Spraks vml[i].vs_data = (void *)pplist[i];
9351841Spraks pplist[i]->p_vpmref = 0;
9361841Spraks }
9371841Spraks
9381841Spraks vml[i].vs_addr = hat_kpm_mapin(pplist[i], 0);
9391841Spraks vml[i].vs_len = PAGESIZE;
9401841Spraks }
9411841Spraks
9421841Spraks vml[i].vs_data = NULL;
9431841Spraks vml[i].vs_addr = (caddr_t)NULL;
9441841Spraks
9451841Spraks return (0);
9461841Spraks }
9471841Spraks
9481841Spraks /*
9491841Spraks * Release the vpm mappings on the pages and unlock them.
9501841Spraks */
9511841Spraks void
vpm_unmap_pages(vmap_t vml[],enum seg_rw rw)9521841Spraks vpm_unmap_pages(vmap_t vml[], enum seg_rw rw)
9531841Spraks {
9541841Spraks int i;
9551841Spraks struct vpmap *vpm;
9561841Spraks kmutex_t *mtx;
9571841Spraks page_t *pp;
9581841Spraks
9591841Spraks for (i = 0; vml[i].vs_data != NULL; i++) {
9601841Spraks ASSERT(IS_KPM_ADDR(vml[i].vs_addr));
9611841Spraks
9621841Spraks if (vpm_cache_enable) {
9631841Spraks pp = *(((page_t **)vml[i].vs_data));
9641841Spraks } else {
9651841Spraks pp = (page_t *)vml[i].vs_data;
9661841Spraks }
9671841Spraks
9681841Spraks /*
9691841Spraks * Mark page as being modified or referenced, bacause vpm pages
9701841Spraks * would not cause faults where it would be set normally.
9711841Spraks */
9721841Spraks if (rw == S_WRITE) {
9731841Spraks hat_setrefmod(pp);
9741841Spraks } else {
9751841Spraks ASSERT(rw == S_READ);
9761841Spraks hat_setref(pp);
9771841Spraks }
9781841Spraks
9791841Spraks if (vpm_cache_enable) {
980*9281SPrakash.Sangappa@Sun.COM vpm = (struct vpmap *)((char *)vml[i].vs_data
981*9281SPrakash.Sangappa@Sun.COM - offsetof(struct vpmap, vpm_pp));
982*9281SPrakash.Sangappa@Sun.COM hat_kpm_mapout(pp, 0, vml[i].vs_addr);
9831841Spraks page_unlock(pp);
9841841Spraks mtx = VPMAPMTX(vpm);
9851841Spraks mutex_enter(mtx);
9861841Spraks
9871841Spraks if (--vpm->vpm_refcnt == 0) {
9881841Spraks free_vpmap(vpm);
9891841Spraks }
9901841Spraks mutex_exit(mtx);
9911841Spraks } else {
9921841Spraks hat_kpm_mapout(pp, 0, vml[i].vs_addr);
9931841Spraks (void) page_release(pp, 1);
9941841Spraks }
9951841Spraks vml[i].vs_data = NULL;
9961841Spraks vml[i].vs_addr = NULL;
9971841Spraks }
9981841Spraks }
9991841Spraks
10001841Spraks /*
10011841Spraks * Given the vp, off and the uio structure, this routine will do the
10021841Spraks * the copy (uiomove). If the last page created is partially written,
10031841Spraks * the rest of the page is zeroed out. It also zeros the beginning of
10041841Spraks * the first page till the start offset if requested(zerostart).
10051841Spraks * If pages are to be fetched, it will call the filesystem's getpage
10061841Spraks * function (VOP_GETPAGE) to get them, otherwise they will be created if
10071841Spraks * not already present in the page cache.
10081841Spraks */
10091841Spraks int
vpm_data_copy(struct vnode * vp,u_offset_t off,size_t len,struct uio * uio,int fetchpage,int * newpage,int zerostart,enum seg_rw rw)10101841Spraks vpm_data_copy(struct vnode *vp,
10111841Spraks u_offset_t off,
10121841Spraks size_t len,
10131841Spraks struct uio *uio,
10141841Spraks int fetchpage,
10151841Spraks int *newpage,
10161841Spraks int zerostart,
10171841Spraks enum seg_rw rw)
10181841Spraks {
10191841Spraks int error;
10201841Spraks struct vmap vml[MINVMAPS];
10211841Spraks enum uio_rw uiorw;
10221841Spraks int npages = 0;
10231841Spraks
10241841Spraks uiorw = (rw == S_WRITE) ? UIO_WRITE : UIO_READ;
10251841Spraks /*
10261841Spraks * 'off' will be the offset where the I/O starts.
10271841Spraks * We get the pages starting at the (off & PAGEMASK)
10281841Spraks * page boundary.
10291841Spraks */
10301841Spraks error = vpm_map_pages(vp, off, (uint_t)len,
1031*9281SPrakash.Sangappa@Sun.COM fetchpage, vml, MINVMAPS, &npages, rw);
10321841Spraks
10331841Spraks if (newpage != NULL)
10341841Spraks *newpage = npages;
10351841Spraks if (!error) {
10361841Spraks int i, pn, slen = len;
10371841Spraks int pon = off & PAGEOFFSET;
10381841Spraks
10391841Spraks /*
10401841Spraks * Clear from the beginning of the page to start offset
10411841Spraks * if requested.
10421841Spraks */
10431841Spraks if (!fetchpage && zerostart) {
10441841Spraks (void) kzero(vml[0].vs_addr, (uint_t)pon);
10451841Spraks VPM_DEBUG(vpmd_zerostart);
10461841Spraks }
10471841Spraks
10481841Spraks for (i = 0; !error && slen > 0 &&
1049*9281SPrakash.Sangappa@Sun.COM vml[i].vs_addr != NULL; i++) {
10501841Spraks pn = (int)MIN(slen, (PAGESIZE - pon));
10511841Spraks error = uiomove(vml[i].vs_addr + pon,
1052*9281SPrakash.Sangappa@Sun.COM (long)pn, uiorw, uio);
10531841Spraks slen -= pn;
10541841Spraks pon = 0;
10551841Spraks }
10561841Spraks
10571841Spraks /*
10581841Spraks * When new pages are created, zero out part of the
10591841Spraks * page we did not copy to.
10601841Spraks */
10611841Spraks if (!fetchpage && npages &&
1062*9281SPrakash.Sangappa@Sun.COM uio->uio_loffset < roundup(off + len, PAGESIZE)) {
10631841Spraks int nzero;
10641841Spraks
10651841Spraks pon = (uio->uio_loffset & PAGEOFFSET);
10661841Spraks nzero = PAGESIZE - pon;
10671841Spraks i = (uio->uio_loffset - (off & PAGEMASK)) / PAGESIZE;
10681841Spraks (void) kzero(vml[i].vs_addr + pon, (uint_t)nzero);
10691841Spraks }
10701841Spraks vpm_unmap_pages(vml, rw);
10711841Spraks }
10721841Spraks return (error);
10731841Spraks }
10741841Spraks
10751841Spraks /*
10761841Spraks * called to flush pages for the given vnode covering
10771841Spraks * [off, off+len] range.
10781841Spraks */
10791841Spraks int
vpm_sync_pages(struct vnode * vp,u_offset_t off,size_t len,uint_t flags)10801841Spraks vpm_sync_pages(struct vnode *vp,
10811841Spraks u_offset_t off,
10821841Spraks size_t len,
10831841Spraks uint_t flags)
10841841Spraks {
10851841Spraks extern struct vnode *common_specvp();
10861841Spraks int bflags = 0;
10871841Spraks int error = 0;
10881841Spraks size_t psize = roundup(len, PAGESIZE);
10891841Spraks
10901841Spraks /*
10911841Spraks * If this is a block device we have to be sure to use the
10921841Spraks * "common" block device vnode for the mapping.
10931841Spraks */
10941841Spraks if (vp->v_type == VBLK)
10951841Spraks vp = common_specvp(vp);
10961841Spraks
10971841Spraks if ((flags & ~SM_DONTNEED) != 0) {
10981841Spraks if (flags & SM_ASYNC)
10991841Spraks bflags |= B_ASYNC;
11001841Spraks if (flags & SM_INVAL)
11011841Spraks bflags |= B_INVAL;
11021841Spraks if (flags & SM_DESTROY)
11031841Spraks bflags |= (B_INVAL|B_TRUNC);
11041841Spraks if (flags & SM_FREE)
11051841Spraks bflags |= B_FREE;
11061841Spraks if (flags & SM_DONTNEED)
11071841Spraks bflags |= B_DONTNEED;
11081841Spraks
11095331Samw error = VOP_PUTPAGE(vp, off, psize, bflags, CRED(), NULL);
11101841Spraks }
11111841Spraks
11121841Spraks return (error);
11131841Spraks }
11141841Spraks
11151841Spraks
11161841Spraks #else /* SEGKPM_SUPPORT */
11171841Spraks
11181841Spraks /* vpm stubs */
11191841Spraks void
vpm_init()11201841Spraks vpm_init()
11211841Spraks {
11221841Spraks }
11231841Spraks
11241841Spraks /*ARGSUSED*/
11251841Spraks int
vpm_pagecreate(struct vnode * vp,u_offset_t baseoff,size_t len,vmap_t vml[],int nseg,int * newpage)11261841Spraks vpm_pagecreate(
11271841Spraks struct vnode *vp,
11281841Spraks u_offset_t baseoff,
11291841Spraks size_t len,
11301841Spraks vmap_t vml[],
11311841Spraks int nseg,
11321841Spraks int *newpage)
11331841Spraks {
11341841Spraks return (0);
11351841Spraks }
11361841Spraks
11371841Spraks /*ARGSUSED*/
11381841Spraks int
vpm_map_pages(struct vnode * vp,u_offset_t off,size_t len,int fetchpage,vmap_t vml[],int nseg,int * newpage,enum seg_rw rw)11391841Spraks vpm_map_pages(
11401841Spraks struct vnode *vp,
11411841Spraks u_offset_t off,
11421841Spraks size_t len,
11431841Spraks int fetchpage,
11441841Spraks vmap_t vml[],
11451841Spraks int nseg,
11461841Spraks int *newpage,
11471841Spraks enum seg_rw rw)
11481841Spraks {
11491841Spraks return (0);
11501841Spraks }
11511841Spraks
11521841Spraks /*ARGSUSED*/
11531841Spraks int
vpm_data_copy(struct vnode * vp,u_offset_t off,size_t len,struct uio * uio,int fetchpage,int * newpage,int zerostart,enum seg_rw rw)11541841Spraks vpm_data_copy(struct vnode *vp,
11551841Spraks u_offset_t off,
11561841Spraks size_t len,
11571841Spraks struct uio *uio,
11581841Spraks int fetchpage,
11591841Spraks int *newpage,
11601841Spraks int zerostart,
11611841Spraks enum seg_rw rw)
11621841Spraks {
11631841Spraks return (0);
11641841Spraks }
11651841Spraks
11661841Spraks /*ARGSUSED*/
11671841Spraks void
vpm_unmap_pages(vmap_t vml[],enum seg_rw rw)11681841Spraks vpm_unmap_pages(vmap_t vml[], enum seg_rw rw)
11691841Spraks {
11701841Spraks }
11711841Spraks /*ARGSUSED*/
11721841Spraks int
vpm_sync_pages(struct vnode * vp,u_offset_t off,size_t len,uint_t flags)11731841Spraks vpm_sync_pages(struct vnode *vp,
11741841Spraks u_offset_t off,
11751841Spraks size_t len,
11761841Spraks uint_t flags)
11771841Spraks {
11781841Spraks return (0);
11791841Spraks }
11801841Spraks #endif /* SEGKPM_SUPPORT */
1181