1*1841Spraks /* 2*1841Spraks * CDDL HEADER START 3*1841Spraks * 4*1841Spraks * The contents of this file are subject to the terms of the 5*1841Spraks * Common Development and Distribution License (the "License"). 6*1841Spraks * You may not use this file except in compliance with the License. 7*1841Spraks * 8*1841Spraks * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*1841Spraks * or http://www.opensolaris.org/os/licensing. 10*1841Spraks * See the License for the specific language governing permissions 11*1841Spraks * and limitations under the License. 12*1841Spraks * 13*1841Spraks * When distributing Covered Code, include this CDDL HEADER in each 14*1841Spraks * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*1841Spraks * If applicable, add the following below this CDDL HEADER, with the 16*1841Spraks * fields enclosed by brackets "[]" replaced with your own identifying 17*1841Spraks * information: Portions Copyright [yyyy] [name of copyright owner] 18*1841Spraks * 19*1841Spraks * CDDL HEADER END 20*1841Spraks */ 21*1841Spraks /* 22*1841Spraks * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23*1841Spraks * Use is subject to license terms. 24*1841Spraks */ 25*1841Spraks 26*1841Spraks #pragma ident "%Z%%M% %I% %E% SMI" 27*1841Spraks 28*1841Spraks /* 29*1841Spraks * VM - generic vnode page mapping interfaces. 30*1841Spraks * 31*1841Spraks * Mechanism to provide temporary mappings to vnode pages. 32*1841Spraks * The typical use would be to copy/access file data. 33*1841Spraks */ 34*1841Spraks 35*1841Spraks #include <sys/types.h> 36*1841Spraks #include <sys/t_lock.h> 37*1841Spraks #include <sys/param.h> 38*1841Spraks #include <sys/sysmacros.h> 39*1841Spraks #include <sys/buf.h> 40*1841Spraks #include <sys/systm.h> 41*1841Spraks #include <sys/vnode.h> 42*1841Spraks #include <sys/mman.h> 43*1841Spraks #include <sys/errno.h> 44*1841Spraks #include <sys/cred.h> 45*1841Spraks #include <sys/kmem.h> 46*1841Spraks #include <sys/vtrace.h> 47*1841Spraks #include <sys/cmn_err.h> 48*1841Spraks #include <sys/debug.h> 49*1841Spraks #include <sys/thread.h> 50*1841Spraks #include <sys/dumphdr.h> 51*1841Spraks #include <sys/bitmap.h> 52*1841Spraks #include <sys/lgrp.h> 53*1841Spraks 54*1841Spraks #include <vm/seg_kmem.h> 55*1841Spraks #include <vm/hat.h> 56*1841Spraks #include <vm/as.h> 57*1841Spraks #include <vm/seg.h> 58*1841Spraks #include <vm/seg_kpm.h> 59*1841Spraks #include <vm/seg_map.h> 60*1841Spraks #include <vm/page.h> 61*1841Spraks #include <vm/pvn.h> 62*1841Spraks #include <vm/rm.h> 63*1841Spraks #include <vm/vpm.h> 64*1841Spraks 65*1841Spraks /* 66*1841Spraks * Needs to be enabled by each platform. 67*1841Spraks */ 68*1841Spraks int vpm_enable = 0; 69*1841Spraks 70*1841Spraks #ifdef SEGKPM_SUPPORT 71*1841Spraks 72*1841Spraks 73*1841Spraks int vpm_cache_enable = 1; 74*1841Spraks long vpm_cache_percent = 12; 75*1841Spraks long vpm_cache_size; 76*1841Spraks int vpm_nfreelist = 0; 77*1841Spraks int vpmd_freemsk = 0; 78*1841Spraks 79*1841Spraks #define VPM_S_PAD 64 80*1841Spraks union vpm_cpu { 81*1841Spraks struct { 82*1841Spraks int vcpu_free_ndx; 83*1841Spraks ulong_t vcpu_hits; 84*1841Spraks ulong_t vcpu_misses; 85*1841Spraks } vcpu; 86*1841Spraks char vpm_pad[VPM_S_PAD]; 87*1841Spraks }; 88*1841Spraks static union vpm_cpu *vpmd_cpu; 89*1841Spraks 90*1841Spraks #define vfree_ndx vcpu.vcpu_free_ndx 91*1841Spraks 92*1841Spraks int vpm_cachemode = VPMCACHE_LRU; 93*1841Spraks 94*1841Spraks #define PPMTX(pp) (&(pp)->p_ilock) 95*1841Spraks 96*1841Spraks static struct vpmap *vpmd_vpmap; /* list of vpmap structs preallocated */ 97*1841Spraks static struct vpmfree *vpmd_free; 98*1841Spraks #define VPMAPMTX(vpm) (&vpm->vpm_mtx) 99*1841Spraks #define VPMAP2VMF(vpm) (&vpmd_free[(vpm - vpmd_vpmap) & vpmd_freemsk]) 100*1841Spraks #define VPMAP2VMF_NDX(vpm) (ushort_t)((vpm - vpmd_vpmap) & vpmd_freemsk) 101*1841Spraks #define VPMP(id) (&vpmd_vpmap[id - 1]) 102*1841Spraks #define VPMID(vpm) (uint_t)((vpm - vpmd_vpmap) + 1) 103*1841Spraks 104*1841Spraks 105*1841Spraks #ifdef DEBUG 106*1841Spraks 107*1841Spraks struct vpm_debug { 108*1841Spraks int vpmd_steals; 109*1841Spraks int vpmd_contend; 110*1841Spraks int vpmd_prevpagelocked; 111*1841Spraks int vpmd_getpagefailed; 112*1841Spraks int vpmd_zerostart; 113*1841Spraks int vpmd_emptyfreelist; 114*1841Spraks int vpmd_nofreevpms; 115*1841Spraks } vpm_debug; 116*1841Spraks 117*1841Spraks #define VPM_DEBUG(x) ((vpm_debug.x)++) 118*1841Spraks 119*1841Spraks int steals; 120*1841Spraks int steals_mtbf = 7; 121*1841Spraks int contend; 122*1841Spraks int contend_mtbf = 127; 123*1841Spraks 124*1841Spraks #define VPM_MTBF(v, f) (((++(v)) & (f)) != (f)) 125*1841Spraks 126*1841Spraks #else /* DEBUG */ 127*1841Spraks 128*1841Spraks #define VPM_MTBF(v, f) (1) 129*1841Spraks #define VPM_DEBUG(x) /* nothing */ 130*1841Spraks 131*1841Spraks #endif 132*1841Spraks 133*1841Spraks /* 134*1841Spraks * The vpm cache. 135*1841Spraks * 136*1841Spraks * The main purpose of having a cache here is to speed up page_lookup() 137*1841Spraks * operations and also provide an LRU(default) behaviour of file pages. The 138*1841Spraks * page_lookup() operation tends to be expensive if a page has to be 139*1841Spraks * reclaimed from the system page cache("cachelist"). Once we speed up the 140*1841Spraks * page_lookup()->page_reclaim() path then there there should be no need for 141*1841Spraks * this cache. The system page cache(cachelist) should effectively serve the 142*1841Spraks * purpose of caching file pages. 143*1841Spraks * 144*1841Spraks * This cache is very similar to segmap's smap cache. Each page in the 145*1841Spraks * cache is tracked by the structure vpmap_t. But unlike segmap, there is no 146*1841Spraks * hash table. The page_t has a reference to the vpmap_t when cached. For a 147*1841Spraks * given vnode, offset the page is found by means of a page_lookup() operation. 148*1841Spraks * Any page which has a mapping(i.e when cached) will not be in the 149*1841Spraks * system 'cachelist'. Hence the page_lookup() will not have to do a 150*1841Spraks * page_reclaim(). That is how the cache serves to speed up page_lookup() 151*1841Spraks * operations. 152*1841Spraks * 153*1841Spraks * This cache can be disabled by setting vpm_cache_enable = 0 in /etc/system. 154*1841Spraks */ 155*1841Spraks 156*1841Spraks void 157*1841Spraks vpm_init() 158*1841Spraks { 159*1841Spraks long npages; 160*1841Spraks struct vpmap *vpm; 161*1841Spraks struct vpmfree *vpmflp; 162*1841Spraks int i, ndx; 163*1841Spraks extern void prefetch_smap_w(void *); 164*1841Spraks 165*1841Spraks if (!vpm_cache_enable) { 166*1841Spraks return; 167*1841Spraks } 168*1841Spraks 169*1841Spraks /* 170*1841Spraks * Set the size of the cache. 171*1841Spraks */ 172*1841Spraks vpm_cache_size = mmu_ptob((physmem * vpm_cache_percent)/100); 173*1841Spraks if (vpm_cache_size < VPMAP_MINCACHE) { 174*1841Spraks vpm_cache_size = VPMAP_MINCACHE; 175*1841Spraks } 176*1841Spraks 177*1841Spraks /* 178*1841Spraks * Number of freelists. 179*1841Spraks */ 180*1841Spraks if (vpm_nfreelist == 0) { 181*1841Spraks vpm_nfreelist = max_ncpus; 182*1841Spraks } else if (vpm_nfreelist < 0 || vpm_nfreelist > 2 * max_ncpus) { 183*1841Spraks cmn_err(CE_WARN, "vpmap create : number of freelist " 184*1841Spraks "vpm_nfreelist %d using %d", vpm_nfreelist, max_ncpus); 185*1841Spraks vpm_nfreelist = 2 * max_ncpus; 186*1841Spraks } 187*1841Spraks 188*1841Spraks /* 189*1841Spraks * Round it up to the next power of 2 190*1841Spraks */ 191*1841Spraks if (vpm_nfreelist & (vpm_nfreelist - 1)) { 192*1841Spraks vpm_nfreelist = 1 << (highbit(vpm_nfreelist)); 193*1841Spraks } 194*1841Spraks vpmd_freemsk = vpm_nfreelist - 1; 195*1841Spraks 196*1841Spraks /* 197*1841Spraks * Use a per cpu rotor index to spread the allocations evenly 198*1841Spraks * across the available vpm freelists. 199*1841Spraks */ 200*1841Spraks vpmd_cpu = kmem_zalloc(sizeof (union vpm_cpu) * max_ncpus, KM_SLEEP); 201*1841Spraks ndx = 0; 202*1841Spraks for (i = 0; i < max_ncpus; i++) { 203*1841Spraks 204*1841Spraks vpmd_cpu[i].vfree_ndx = ndx; 205*1841Spraks ndx = (ndx + 1) & vpmd_freemsk; 206*1841Spraks } 207*1841Spraks 208*1841Spraks /* 209*1841Spraks * Allocate and initialize the freelist. 210*1841Spraks */ 211*1841Spraks vpmd_free = kmem_zalloc(vpm_nfreelist * sizeof (struct vpmfree), 212*1841Spraks KM_SLEEP); 213*1841Spraks for (i = 0; i < vpm_nfreelist; i++) { 214*1841Spraks 215*1841Spraks vpmflp = &vpmd_free[i]; 216*1841Spraks /* 217*1841Spraks * Set up initial queue pointers. They will get flipped 218*1841Spraks * back and forth. 219*1841Spraks */ 220*1841Spraks vpmflp->vpm_allocq = &vpmflp->vpm_freeq[VPMALLOCQ]; 221*1841Spraks vpmflp->vpm_releq = &vpmflp->vpm_freeq[VPMRELEQ]; 222*1841Spraks } 223*1841Spraks 224*1841Spraks npages = mmu_btop(vpm_cache_size); 225*1841Spraks 226*1841Spraks 227*1841Spraks /* 228*1841Spraks * Allocate and initialize the vpmap structs. 229*1841Spraks */ 230*1841Spraks vpmd_vpmap = kmem_zalloc(sizeof (struct vpmap) * npages, KM_SLEEP); 231*1841Spraks for (vpm = vpmd_vpmap; vpm <= &vpmd_vpmap[npages - 1]; vpm++) { 232*1841Spraks struct vpmfree *vpmflp; 233*1841Spraks union vpm_freeq *releq; 234*1841Spraks struct vpmap *vpmapf; 235*1841Spraks 236*1841Spraks /* 237*1841Spraks * Use prefetch as we have to walk thru a large number of 238*1841Spraks * these data structures. We just use the smap's prefetch 239*1841Spraks * routine as it does the same. This should work fine 240*1841Spraks * for x64(this needs to be modifed when enabled on sparc). 241*1841Spraks */ 242*1841Spraks prefetch_smap_w((void *)vpm); 243*1841Spraks 244*1841Spraks vpm->vpm_free_ndx = VPMAP2VMF_NDX(vpm); 245*1841Spraks 246*1841Spraks vpmflp = VPMAP2VMF(vpm); 247*1841Spraks releq = vpmflp->vpm_releq; 248*1841Spraks 249*1841Spraks vpmapf = releq->vpmq_free; 250*1841Spraks if (vpmapf == NULL) { 251*1841Spraks releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm; 252*1841Spraks } else { 253*1841Spraks vpm->vpm_next = vpmapf; 254*1841Spraks vpm->vpm_prev = vpmapf->vpm_prev; 255*1841Spraks vpmapf->vpm_prev = vpm; 256*1841Spraks vpm->vpm_prev->vpm_next = vpm; 257*1841Spraks releq->vpmq_free = vpm->vpm_next; 258*1841Spraks } 259*1841Spraks 260*1841Spraks /* 261*1841Spraks * Indicate that the vpmap is on the releq at start 262*1841Spraks */ 263*1841Spraks vpm->vpm_ndxflg = VPMRELEQ; 264*1841Spraks } 265*1841Spraks } 266*1841Spraks 267*1841Spraks 268*1841Spraks /* 269*1841Spraks * unhooks vpm from the freelist if it is still on the freelist. 270*1841Spraks */ 271*1841Spraks #define VPMAP_RMFREELIST(vpm) \ 272*1841Spraks { \ 273*1841Spraks if (vpm->vpm_next != NULL) { \ 274*1841Spraks union vpm_freeq *freeq; \ 275*1841Spraks struct vpmfree *vpmflp; \ 276*1841Spraks vpmflp = &vpmd_free[vpm->vpm_free_ndx]; \ 277*1841Spraks freeq = &vpmflp->vpm_freeq[vpm->vpm_ndxflg]; \ 278*1841Spraks mutex_enter(&freeq->vpmq_mtx); \ 279*1841Spraks if (freeq->vpmq_free != vpm) { \ 280*1841Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; \ 281*1841Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; \ 282*1841Spraks } else if (vpm == vpm->vpm_next) { \ 283*1841Spraks freeq->vpmq_free = NULL; \ 284*1841Spraks } else { \ 285*1841Spraks freeq->vpmq_free = vpm->vpm_next; \ 286*1841Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; \ 287*1841Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; \ 288*1841Spraks } \ 289*1841Spraks mutex_exit(&freeq->vpmq_mtx); \ 290*1841Spraks vpm->vpm_next = vpm->vpm_prev = NULL; \ 291*1841Spraks } \ 292*1841Spraks } 293*1841Spraks 294*1841Spraks static int 295*1841Spraks get_freelndx(int mode) 296*1841Spraks { 297*1841Spraks int ndx; 298*1841Spraks 299*1841Spraks ndx = vpmd_cpu[CPU->cpu_seqid].vfree_ndx & vpmd_freemsk; 300*1841Spraks switch (mode) { 301*1841Spraks 302*1841Spraks case VPMCACHE_LRU: 303*1841Spraks default: 304*1841Spraks vpmd_cpu[CPU->cpu_seqid].vfree_ndx++; 305*1841Spraks break; 306*1841Spraks } 307*1841Spraks return (ndx); 308*1841Spraks } 309*1841Spraks 310*1841Spraks 311*1841Spraks /* 312*1841Spraks * Find one vpmap structure from the free lists and use it for the newpage. 313*1841Spraks * The previous page it cached is dissociated and released. The page_t's 314*1841Spraks * p_vpmref is cleared only when the vpm it is pointing to is locked(or 315*1841Spraks * for AMD64 when the page is exclusively locked in page_unload. That is 316*1841Spraks * because the p_vpmref is treated as mapping). 317*1841Spraks * 318*1841Spraks * The page's p_vpmref is set when the page is 319*1841Spraks * locked(at least SHARED locked). 320*1841Spraks */ 321*1841Spraks static struct vpmap * 322*1841Spraks get_free_vpmap(page_t *newpage) 323*1841Spraks { 324*1841Spraks struct vpmfree *vpmflp; 325*1841Spraks kmutex_t *vmtx; 326*1841Spraks struct vpmap *vpm, *first; 327*1841Spraks union vpm_freeq *allocq, *releq; 328*1841Spraks page_t *pp = NULL; 329*1841Spraks int end_ndx, page_locked = 0; 330*1841Spraks int free_ndx; 331*1841Spraks 332*1841Spraks /* 333*1841Spraks * get the freelist bin index. 334*1841Spraks */ 335*1841Spraks free_ndx = get_freelndx(vpm_cachemode); 336*1841Spraks 337*1841Spraks end_ndx = free_ndx; 338*1841Spraks vpmflp = &vpmd_free[free_ndx]; 339*1841Spraks 340*1841Spraks retry_queue: 341*1841Spraks allocq = vpmflp->vpm_allocq; 342*1841Spraks mutex_enter(&allocq->vpmq_mtx); 343*1841Spraks 344*1841Spraks if ((vpm = allocq->vpmq_free) == NULL) { 345*1841Spraks 346*1841Spraks skip_queue: 347*1841Spraks /* 348*1841Spraks * The alloc list is empty or this queue is being skipped; 349*1841Spraks * first see if the allocq toggled. 350*1841Spraks */ 351*1841Spraks if (vpmflp->vpm_allocq != allocq) { 352*1841Spraks /* queue changed */ 353*1841Spraks mutex_exit(&allocq->vpmq_mtx); 354*1841Spraks goto retry_queue; 355*1841Spraks } 356*1841Spraks releq = vpmflp->vpm_releq; 357*1841Spraks if (!mutex_tryenter(&releq->vpmq_mtx)) { 358*1841Spraks /* cannot get releq; a free vpmap may be there now */ 359*1841Spraks mutex_exit(&allocq->vpmq_mtx); 360*1841Spraks 361*1841Spraks /* 362*1841Spraks * This loop could spin forever if this thread has 363*1841Spraks * higher priority than the thread that is holding 364*1841Spraks * releq->vpmq_mtx. In order to force the other thread 365*1841Spraks * to run, we'll lock/unlock the mutex which is safe 366*1841Spraks * since we just unlocked the allocq mutex. 367*1841Spraks */ 368*1841Spraks mutex_enter(&releq->vpmq_mtx); 369*1841Spraks mutex_exit(&releq->vpmq_mtx); 370*1841Spraks goto retry_queue; 371*1841Spraks } 372*1841Spraks if (releq->vpmq_free == NULL) { 373*1841Spraks VPM_DEBUG(vpmd_emptyfreelist); 374*1841Spraks /* 375*1841Spraks * This freelist is empty. 376*1841Spraks * This should not happen unless clients 377*1841Spraks * are failing to release the vpmap after 378*1841Spraks * accessing the data. Before resorting 379*1841Spraks * to sleeping, try the next list of the same color. 380*1841Spraks */ 381*1841Spraks free_ndx = (free_ndx + 1) & vpmd_freemsk; 382*1841Spraks if (free_ndx != end_ndx) { 383*1841Spraks mutex_exit(&releq->vpmq_mtx); 384*1841Spraks mutex_exit(&allocq->vpmq_mtx); 385*1841Spraks vpmflp = &vpmd_free[free_ndx]; 386*1841Spraks goto retry_queue; 387*1841Spraks } 388*1841Spraks /* 389*1841Spraks * Tried all freelists. 390*1841Spraks * wait on this list and hope something gets freed. 391*1841Spraks */ 392*1841Spraks vpmflp->vpm_want++; 393*1841Spraks mutex_exit(&vpmflp->vpm_freeq[1].vpmq_mtx); 394*1841Spraks cv_wait(&vpmflp->vpm_free_cv, 395*1841Spraks &vpmflp->vpm_freeq[0].vpmq_mtx); 396*1841Spraks vpmflp->vpm_want--; 397*1841Spraks mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx); 398*1841Spraks vpmflp = &vpmd_free[free_ndx]; 399*1841Spraks VPM_DEBUG(vpmd_nofreevpms); 400*1841Spraks goto retry_queue; 401*1841Spraks } else { 402*1841Spraks /* 403*1841Spraks * Something on the rele queue; flip the alloc 404*1841Spraks * and rele queues and retry. 405*1841Spraks */ 406*1841Spraks vpmflp->vpm_allocq = releq; 407*1841Spraks vpmflp->vpm_releq = allocq; 408*1841Spraks mutex_exit(&allocq->vpmq_mtx); 409*1841Spraks mutex_exit(&releq->vpmq_mtx); 410*1841Spraks if (page_locked) { 411*1841Spraks delay(hz >> 2); 412*1841Spraks page_locked = 0; 413*1841Spraks } 414*1841Spraks goto retry_queue; 415*1841Spraks } 416*1841Spraks } else { 417*1841Spraks int gotnewvpm; 418*1841Spraks kmutex_t *pmtx; 419*1841Spraks uint_t vpmref; 420*1841Spraks 421*1841Spraks /* 422*1841Spraks * Fastpath the case we get the vpmap mutex 423*1841Spraks * on the first try. 424*1841Spraks */ 425*1841Spraks first = vpm; 426*1841Spraks next_vpmap: 427*1841Spraks vmtx = VPMAPMTX(vpm); 428*1841Spraks if (!mutex_tryenter(vmtx)) { 429*1841Spraks /* 430*1841Spraks * Another thread is trying to reclaim this slot. 431*1841Spraks * Skip to the next queue or vpmap. 432*1841Spraks */ 433*1841Spraks if ((vpm = vpm->vpm_next) == first) { 434*1841Spraks goto skip_queue; 435*1841Spraks } else { 436*1841Spraks goto next_vpmap; 437*1841Spraks } 438*1841Spraks } 439*1841Spraks 440*1841Spraks /* 441*1841Spraks * Assign this vpm to the newpage. 442*1841Spraks */ 443*1841Spraks pmtx = PPMTX(newpage); 444*1841Spraks gotnewvpm = 0; 445*1841Spraks mutex_enter(pmtx); 446*1841Spraks 447*1841Spraks /* 448*1841Spraks * Check if some other thread already assigned a vpm to 449*1841Spraks * this page. 450*1841Spraks */ 451*1841Spraks if ((vpmref = newpage->p_vpmref) == 0) { 452*1841Spraks newpage->p_vpmref = VPMID(vpm); 453*1841Spraks gotnewvpm = 1; 454*1841Spraks } else { 455*1841Spraks VPM_DEBUG(vpmd_contend); 456*1841Spraks mutex_exit(vmtx); 457*1841Spraks } 458*1841Spraks mutex_exit(pmtx); 459*1841Spraks 460*1841Spraks if (gotnewvpm) { 461*1841Spraks 462*1841Spraks /* 463*1841Spraks * At this point, we've selected the vpm. Remove vpm 464*1841Spraks * from its freelist. If vpm is the first one in 465*1841Spraks * the freelist, update the head of the freelist. 466*1841Spraks */ 467*1841Spraks if (first == vpm) { 468*1841Spraks ASSERT(first == allocq->vpmq_free); 469*1841Spraks allocq->vpmq_free = vpm->vpm_next; 470*1841Spraks } 471*1841Spraks 472*1841Spraks /* 473*1841Spraks * If the head of the freelist still points to vpm, 474*1841Spraks * then there are no more free vpmaps in that list. 475*1841Spraks */ 476*1841Spraks if (allocq->vpmq_free == vpm) 477*1841Spraks /* 478*1841Spraks * Took the last one 479*1841Spraks */ 480*1841Spraks allocq->vpmq_free = NULL; 481*1841Spraks else { 482*1841Spraks vpm->vpm_prev->vpm_next = vpm->vpm_next; 483*1841Spraks vpm->vpm_next->vpm_prev = vpm->vpm_prev; 484*1841Spraks } 485*1841Spraks mutex_exit(&allocq->vpmq_mtx); 486*1841Spraks vpm->vpm_prev = vpm->vpm_next = NULL; 487*1841Spraks 488*1841Spraks /* 489*1841Spraks * Disassociate the previous page. On x64 systems 490*1841Spraks * p_vpmref is used as a mapping reference to the page. 491*1841Spraks */ 492*1841Spraks if ((pp = vpm->vpm_pp) != NULL && 493*1841Spraks vpm->vpm_vp == pp->p_vnode && 494*1841Spraks vpm->vpm_off == pp->p_offset) { 495*1841Spraks 496*1841Spraks pmtx = PPMTX(pp); 497*1841Spraks if (page_trylock(pp, SE_SHARED)) { 498*1841Spraks /* 499*1841Spraks * Now verify that it is the correct 500*1841Spraks * page. If not someone else stole it, 501*1841Spraks * so just unlock it and leave. 502*1841Spraks */ 503*1841Spraks mutex_enter(pmtx); 504*1841Spraks if (PP_ISFREE(pp) || 505*1841Spraks vpm->vpm_vp != pp->p_vnode || 506*1841Spraks vpm->vpm_off != pp->p_offset || 507*1841Spraks pp->p_vpmref != VPMID(vpm)) { 508*1841Spraks mutex_exit(pmtx); 509*1841Spraks 510*1841Spraks page_unlock(pp); 511*1841Spraks } else { 512*1841Spraks /* 513*1841Spraks * Release the page. 514*1841Spraks */ 515*1841Spraks pp->p_vpmref = 0; 516*1841Spraks mutex_exit(pmtx); 517*1841Spraks hat_kpm_mapout(pp, 0, 518*1841Spraks hat_kpm_page2va(pp, 1)); 519*1841Spraks (void) page_release(pp, 1); 520*1841Spraks } 521*1841Spraks } else { 522*1841Spraks /* 523*1841Spraks * If the page cannot be locked, just 524*1841Spraks * clear the p_vpmref and go. 525*1841Spraks */ 526*1841Spraks mutex_enter(pmtx); 527*1841Spraks if (pp->p_vpmref == VPMID(vpm)) { 528*1841Spraks pp->p_vpmref = 0; 529*1841Spraks } 530*1841Spraks mutex_exit(pmtx); 531*1841Spraks VPM_DEBUG(vpmd_prevpagelocked); 532*1841Spraks } 533*1841Spraks } 534*1841Spraks 535*1841Spraks /* 536*1841Spraks * Setup vpm to point to the new page. 537*1841Spraks */ 538*1841Spraks vpm->vpm_pp = newpage; 539*1841Spraks vpm->vpm_vp = newpage->p_vnode; 540*1841Spraks vpm->vpm_off = newpage->p_offset; 541*1841Spraks 542*1841Spraks } else { 543*1841Spraks int steal = !VPM_MTBF(steals, steals_mtbf); 544*1841Spraks /* 545*1841Spraks * Page already has a vpm assigned just use that. 546*1841Spraks * Grab the vpm mutex and verify that it is still 547*1841Spraks * the correct one. The pp->p_vpmref should not change 548*1841Spraks * once we have the vpm mutex and the page lock. 549*1841Spraks */ 550*1841Spraks mutex_exit(&allocq->vpmq_mtx); 551*1841Spraks vpm = VPMP(vpmref); 552*1841Spraks vmtx = VPMAPMTX(vpm); 553*1841Spraks mutex_enter(vmtx); 554*1841Spraks if ((steal && vpm->vpm_refcnt == 0) || 555*1841Spraks vpm->vpm_pp != newpage) { 556*1841Spraks /* 557*1841Spraks * The vpm got stolen, retry. 558*1841Spraks * clear the p_vpmref. 559*1841Spraks */ 560*1841Spraks pmtx = PPMTX(newpage); 561*1841Spraks mutex_enter(pmtx); 562*1841Spraks if (newpage->p_vpmref == vpmref) { 563*1841Spraks newpage->p_vpmref = 0; 564*1841Spraks } 565*1841Spraks mutex_exit(pmtx); 566*1841Spraks 567*1841Spraks mutex_exit(vmtx); 568*1841Spraks VPM_DEBUG(vpmd_steals); 569*1841Spraks goto retry_queue; 570*1841Spraks } else if (vpm->vpm_refcnt == 0) { 571*1841Spraks /* 572*1841Spraks * Remove it from the free list if it 573*1841Spraks * exists there. 574*1841Spraks */ 575*1841Spraks VPMAP_RMFREELIST(vpm); 576*1841Spraks } 577*1841Spraks } 578*1841Spraks return (vpm); 579*1841Spraks } 580*1841Spraks } 581*1841Spraks 582*1841Spraks static void 583*1841Spraks free_vpmap(struct vpmap *vpm) 584*1841Spraks { 585*1841Spraks struct vpmfree *vpmflp; 586*1841Spraks struct vpmap *vpmfreelist; 587*1841Spraks union vpm_freeq *releq; 588*1841Spraks 589*1841Spraks ASSERT(MUTEX_HELD(VPMAPMTX(vpm))); 590*1841Spraks 591*1841Spraks if (vpm->vpm_refcnt != 0) { 592*1841Spraks panic("free_vpmap"); 593*1841Spraks /*NOTREACHED*/ 594*1841Spraks } 595*1841Spraks 596*1841Spraks vpmflp = &vpmd_free[vpm->vpm_free_ndx]; 597*1841Spraks /* 598*1841Spraks * Add to the tail of the release queue 599*1841Spraks * Note that vpm_releq and vpm_allocq could toggle 600*1841Spraks * before we get the lock. This does not affect 601*1841Spraks * correctness as the 2 queues are only maintained 602*1841Spraks * to reduce lock pressure. 603*1841Spraks */ 604*1841Spraks releq = vpmflp->vpm_releq; 605*1841Spraks if (releq == &vpmflp->vpm_freeq[0]) { 606*1841Spraks vpm->vpm_ndxflg = 0; 607*1841Spraks } else { 608*1841Spraks vpm->vpm_ndxflg = 1; 609*1841Spraks } 610*1841Spraks mutex_enter(&releq->vpmq_mtx); 611*1841Spraks vpmfreelist = releq->vpmq_free; 612*1841Spraks if (vpmfreelist == 0) { 613*1841Spraks int want; 614*1841Spraks 615*1841Spraks releq->vpmq_free = vpm->vpm_next = vpm->vpm_prev = vpm; 616*1841Spraks /* 617*1841Spraks * Both queue mutexes are held to set vpm_want; 618*1841Spraks * snapshot the value before dropping releq mutex. 619*1841Spraks * If vpm_want appears after the releq mutex is dropped, 620*1841Spraks * then the vpmap just freed is already gone. 621*1841Spraks */ 622*1841Spraks want = vpmflp->vpm_want; 623*1841Spraks mutex_exit(&releq->vpmq_mtx); 624*1841Spraks /* 625*1841Spraks * See if there was a waiter before dropping the releq mutex 626*1841Spraks * then recheck after obtaining vpm_freeq[0] mutex as 627*1841Spraks * the another thread may have already signaled. 628*1841Spraks */ 629*1841Spraks if (want) { 630*1841Spraks mutex_enter(&vpmflp->vpm_freeq[0].vpmq_mtx); 631*1841Spraks if (vpmflp->vpm_want) 632*1841Spraks cv_signal(&vpmflp->vpm_free_cv); 633*1841Spraks mutex_exit(&vpmflp->vpm_freeq[0].vpmq_mtx); 634*1841Spraks } 635*1841Spraks } else { 636*1841Spraks vpm->vpm_next = vpmfreelist; 637*1841Spraks vpm->vpm_prev = vpmfreelist->vpm_prev; 638*1841Spraks vpmfreelist->vpm_prev = vpm; 639*1841Spraks vpm->vpm_prev->vpm_next = vpm; 640*1841Spraks mutex_exit(&releq->vpmq_mtx); 641*1841Spraks } 642*1841Spraks } 643*1841Spraks 644*1841Spraks /* 645*1841Spraks * Get the vpmap for the page. 646*1841Spraks * The refcnt of this vpm is incremented. 647*1841Spraks */ 648*1841Spraks static struct vpmap * 649*1841Spraks get_vpmap(page_t *pp) 650*1841Spraks { 651*1841Spraks struct vpmap *vpm = NULL; 652*1841Spraks kmutex_t *vmtx; 653*1841Spraks kmutex_t *pmtx; 654*1841Spraks unsigned int refid; 655*1841Spraks 656*1841Spraks ASSERT((pp != NULL) && PAGE_LOCKED(pp)); 657*1841Spraks 658*1841Spraks if (VPM_MTBF(contend, contend_mtbf) && (refid = pp->p_vpmref) != 0) { 659*1841Spraks vpm = VPMP(refid); 660*1841Spraks vmtx = VPMAPMTX(vpm); 661*1841Spraks mutex_enter(vmtx); 662*1841Spraks /* 663*1841Spraks * Since we have the page lock and the vpm mutex, the 664*1841Spraks * pp->p_vpmref cannot change. 665*1841Spraks */ 666*1841Spraks if (vpm->vpm_pp != pp) { 667*1841Spraks pmtx = PPMTX(pp); 668*1841Spraks 669*1841Spraks /* 670*1841Spraks * Clear the p_vpmref as it is incorrect. 671*1841Spraks * This can happen if the page was stolen. 672*1841Spraks * On x64 this should not happen as p_vpmref 673*1841Spraks * is treated as a mapping on the page. So 674*1841Spraks * if the page is stolen, the mapping would have 675*1841Spraks * been cleared in page_unload(). 676*1841Spraks */ 677*1841Spraks mutex_enter(pmtx); 678*1841Spraks if (pp->p_vpmref == refid) 679*1841Spraks pp->p_vpmref = 0; 680*1841Spraks mutex_exit(pmtx); 681*1841Spraks 682*1841Spraks mutex_exit(vmtx); 683*1841Spraks vpm = NULL; 684*1841Spraks } else if (vpm->vpm_refcnt == 0) { 685*1841Spraks /* 686*1841Spraks * Got the vpm, remove it from the free 687*1841Spraks * list if it exists there. 688*1841Spraks */ 689*1841Spraks VPMAP_RMFREELIST(vpm); 690*1841Spraks } 691*1841Spraks } 692*1841Spraks if (vpm == NULL) { 693*1841Spraks /* 694*1841Spraks * get_free_vpmap() returns with the vpmap mutex held. 695*1841Spraks */ 696*1841Spraks vpm = get_free_vpmap(pp); 697*1841Spraks vmtx = VPMAPMTX(vpm); 698*1841Spraks vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_misses++; 699*1841Spraks } else { 700*1841Spraks vpmd_cpu[CPU->cpu_seqid].vcpu.vcpu_hits++; 701*1841Spraks } 702*1841Spraks 703*1841Spraks vpm->vpm_refcnt++; 704*1841Spraks mutex_exit(vmtx); 705*1841Spraks 706*1841Spraks return (vpm); 707*1841Spraks } 708*1841Spraks 709*1841Spraks /* END --- vpm cache ---- */ 710*1841Spraks 711*1841Spraks /* 712*1841Spraks * The vnode page mapping(vpm) interface routines. 713*1841Spraks */ 714*1841Spraks 715*1841Spraks /* 716*1841Spraks * Find or create the pages starting form baseoff for specified 717*1841Spraks * length 'len'. 718*1841Spraks */ 719*1841Spraks static int 720*1841Spraks vpm_pagecreate( 721*1841Spraks struct vnode *vp, 722*1841Spraks u_offset_t baseoff, 723*1841Spraks size_t len, 724*1841Spraks vmap_t vml[], 725*1841Spraks int nseg, 726*1841Spraks int *newpage) 727*1841Spraks { 728*1841Spraks 729*1841Spraks page_t *pp = NULL; 730*1841Spraks caddr_t base; 731*1841Spraks u_offset_t off = baseoff; 732*1841Spraks int i; 733*1841Spraks ASSERT(nseg >= MINVMAPS && nseg < MAXVMAPS); 734*1841Spraks 735*1841Spraks for (i = 0; len > 0; len -= MIN(len, PAGESIZE), i++) { 736*1841Spraks struct vpmap *vpm; 737*1841Spraks 738*1841Spraks 739*1841Spraks if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 740*1841Spraks 741*1841Spraks base = segkpm_create_va(off); 742*1841Spraks 743*1841Spraks /* 744*1841Spraks * the seg pointer passed in is just advisor. Just 745*1841Spraks * pass segkmap for now like segmap does with 746*1841Spraks * segmap_kpm enabled. 747*1841Spraks */ 748*1841Spraks if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT, 749*1841Spraks segkmap, base)) == NULL) { 750*1841Spraks panic("segmap_pagecreate_vpm: " 751*1841Spraks "page_create failed"); 752*1841Spraks /*NOTREACHED*/ 753*1841Spraks } 754*1841Spraks if (newpage != NULL) 755*1841Spraks *newpage = 1; 756*1841Spraks 757*1841Spraks page_io_unlock(pp); 758*1841Spraks } 759*1841Spraks 760*1841Spraks /* 761*1841Spraks * Get the vpm for this page_t. 762*1841Spraks */ 763*1841Spraks if (vpm_cache_enable) { 764*1841Spraks vpm = get_vpmap(pp); 765*1841Spraks vml[i].vs_data = (void *)&vpm->vpm_pp; 766*1841Spraks } else { 767*1841Spraks vml[i].vs_data = (void *)pp; 768*1841Spraks pp->p_vpmref = 0; 769*1841Spraks } 770*1841Spraks 771*1841Spraks vml[i].vs_addr = hat_kpm_mapin(pp, 0); 772*1841Spraks vml[i].vs_len = PAGESIZE; 773*1841Spraks 774*1841Spraks off += PAGESIZE; 775*1841Spraks } 776*1841Spraks vml[i].vs_data = NULL; 777*1841Spraks vml[i].vs_addr = (caddr_t)NULL; 778*1841Spraks return (0); 779*1841Spraks } 780*1841Spraks 781*1841Spraks 782*1841Spraks /* 783*1841Spraks * Returns vpm mappings of pages in the range [off, off+len], where 784*1841Spraks * len is rounded up to the PAGESIZE boundary. The list of pages and 785*1841Spraks * the page addresses are returned in the SGL vml (vmap_t) array passed in. 786*1841Spraks * The nseg is the number of vmap_t entries in the array. 787*1841Spraks * 788*1841Spraks * Currently max len allowed is MAXBSIZE therefore, it will either 789*1841Spraks * fetch/create one or two pages depending on what is the PAGESIZE. 790*1841Spraks * 791*1841Spraks * The segmap's SM_LOCKPROTO usage is not supported by these interfaces. 792*1841Spraks * For such cases, use the seg_map interfaces. 793*1841Spraks */ 794*1841Spraks int 795*1841Spraks vpm_map_pages( 796*1841Spraks struct vnode *vp, 797*1841Spraks u_offset_t off, 798*1841Spraks size_t len, 799*1841Spraks int fetchpage, 800*1841Spraks vmap_t *vml, 801*1841Spraks int nseg, 802*1841Spraks int *newpage, 803*1841Spraks enum seg_rw rw) 804*1841Spraks { 805*1841Spraks extern struct vnode *common_specvp(); 806*1841Spraks u_offset_t baseoff; 807*1841Spraks uint_t prot; 808*1841Spraks caddr_t base; 809*1841Spraks page_t *pp, *pplist[MAXVMAPS]; 810*1841Spraks struct vpmap *vpm; 811*1841Spraks int i, error = 0; 812*1841Spraks 813*1841Spraks ASSERT(nseg >= MINVMAPS && nseg < MAXVMAPS); 814*1841Spraks baseoff = off & (offset_t)PAGEMASK; 815*1841Spraks vml[0].vs_data = NULL; 816*1841Spraks vml[0].vs_addr = (caddr_t)NULL; 817*1841Spraks /* 818*1841Spraks * For now, lets restrict it to MAXBSIZE. XXX - We can allow 819*1841Spraks * len longer then MAXBSIZE, but there should be a limit 820*1841Spraks * which should be determined by how many pages the VOP_GETPAGE() 821*1841Spraks * can fetch. 822*1841Spraks */ 823*1841Spraks if (off + len > baseoff + MAXBSIZE) { 824*1841Spraks panic("vpm_map_pages bad len"); 825*1841Spraks /*NOTREACHED*/ 826*1841Spraks } 827*1841Spraks 828*1841Spraks /* 829*1841Spraks * If this is a block device we have to be sure to use the 830*1841Spraks * "common" block device vnode for the mapping. 831*1841Spraks */ 832*1841Spraks if (vp->v_type == VBLK) 833*1841Spraks vp = common_specvp(vp); 834*1841Spraks 835*1841Spraks 836*1841Spraks if (!fetchpage) 837*1841Spraks return (vpm_pagecreate(vp, baseoff, len, vml, nseg, newpage)); 838*1841Spraks 839*1841Spraks for (i = 0; len > 0; len -= MIN(len, PAGESIZE), i++, 840*1841Spraks pplist[i] = NULL) { 841*1841Spraks 842*1841Spraks pp = page_lookup(vp, baseoff, SE_SHARED); 843*1841Spraks 844*1841Spraks /* 845*1841Spraks * If we did not find the page or if this page was not 846*1841Spraks * in our cache, then let VOP_GETPAGE get all the pages. 847*1841Spraks * We need to call VOP_GETPAGE so that filesytems can do some 848*1841Spraks * (un)necessary tracking for sequential access. 849*1841Spraks */ 850*1841Spraks 851*1841Spraks if (pp == NULL || (vpm_cache_enable && pp->p_vpmref == 0) || 852*1841Spraks (rw == S_WRITE && hat_page_getattr(pp, P_MOD | P_REF) 853*1841Spraks != (P_MOD | P_REF))) { 854*1841Spraks if (pp != NULL) { 855*1841Spraks page_unlock(pp); 856*1841Spraks } 857*1841Spraks 858*1841Spraks /* 859*1841Spraks * Pass a dummy address as it will be required 860*1841Spraks * by page_create_va(). We pass segkmap as the seg 861*1841Spraks * as some file systems(UFS) check it. 862*1841Spraks */ 863*1841Spraks base = segkpm_create_va(baseoff); 864*1841Spraks 865*1841Spraks error = VOP_GETPAGE(vp, baseoff, len, &prot, &pplist[i], 866*1841Spraks roundup(len, PAGESIZE), segkmap, base, rw, CRED()); 867*1841Spraks if (error) { 868*1841Spraks VPM_DEBUG(vpmd_getpagefailed); 869*1841Spraks pplist[i] = NULL; 870*1841Spraks } 871*1841Spraks break; 872*1841Spraks } else { 873*1841Spraks pplist[i] = pp; 874*1841Spraks baseoff += PAGESIZE; 875*1841Spraks } 876*1841Spraks } 877*1841Spraks 878*1841Spraks if (error) { 879*1841Spraks for (i = 0; pplist[i] != NULL; i++) { 880*1841Spraks page_unlock(pplist[i]); 881*1841Spraks pplist[i] = NULL; 882*1841Spraks } 883*1841Spraks vml[0].vs_addr = NULL; 884*1841Spraks vml[0].vs_data = NULL; 885*1841Spraks return (FC_MAKE_ERR(error)); 886*1841Spraks } 887*1841Spraks 888*1841Spraks /* 889*1841Spraks * Get the vpm's for pages. 890*1841Spraks */ 891*1841Spraks for (i = 0; pplist[i] != NULL; i++) { 892*1841Spraks if (vpm_cache_enable) { 893*1841Spraks vpm = get_vpmap(pplist[i]); 894*1841Spraks vml[i].vs_data = (void *)&(vpm->vpm_pp); 895*1841Spraks } else { 896*1841Spraks vml[i].vs_data = (void *)pplist[i]; 897*1841Spraks pplist[i]->p_vpmref = 0; 898*1841Spraks } 899*1841Spraks 900*1841Spraks vml[i].vs_addr = hat_kpm_mapin(pplist[i], 0); 901*1841Spraks vml[i].vs_len = PAGESIZE; 902*1841Spraks } 903*1841Spraks 904*1841Spraks vml[i].vs_data = NULL; 905*1841Spraks vml[i].vs_addr = (caddr_t)NULL; 906*1841Spraks 907*1841Spraks return (0); 908*1841Spraks } 909*1841Spraks 910*1841Spraks /* 911*1841Spraks * Release the vpm mappings on the pages and unlock them. 912*1841Spraks */ 913*1841Spraks void 914*1841Spraks vpm_unmap_pages(vmap_t vml[], enum seg_rw rw) 915*1841Spraks { 916*1841Spraks int i; 917*1841Spraks struct vpmap *vpm; 918*1841Spraks kmutex_t *mtx; 919*1841Spraks page_t *pp; 920*1841Spraks 921*1841Spraks for (i = 0; vml[i].vs_data != NULL; i++) { 922*1841Spraks ASSERT(IS_KPM_ADDR(vml[i].vs_addr)); 923*1841Spraks 924*1841Spraks if (vpm_cache_enable) { 925*1841Spraks pp = *(((page_t **)vml[i].vs_data)); 926*1841Spraks } else { 927*1841Spraks pp = (page_t *)vml[i].vs_data; 928*1841Spraks } 929*1841Spraks 930*1841Spraks /* 931*1841Spraks * Mark page as being modified or referenced, bacause vpm pages 932*1841Spraks * would not cause faults where it would be set normally. 933*1841Spraks */ 934*1841Spraks if (rw == S_WRITE) { 935*1841Spraks hat_setrefmod(pp); 936*1841Spraks } else { 937*1841Spraks ASSERT(rw == S_READ); 938*1841Spraks hat_setref(pp); 939*1841Spraks } 940*1841Spraks 941*1841Spraks if (vpm_cache_enable) { 942*1841Spraks page_unlock(pp); 943*1841Spraks vpm = (struct vpmap *)((char *)vml[i].vs_data 944*1841Spraks - offsetof(struct vpmap, vpm_pp)); 945*1841Spraks mtx = VPMAPMTX(vpm); 946*1841Spraks mutex_enter(mtx); 947*1841Spraks 948*1841Spraks if (--vpm->vpm_refcnt == 0) { 949*1841Spraks free_vpmap(vpm); 950*1841Spraks } 951*1841Spraks mutex_exit(mtx); 952*1841Spraks } else { 953*1841Spraks hat_kpm_mapout(pp, 0, vml[i].vs_addr); 954*1841Spraks (void) page_release(pp, 1); 955*1841Spraks } 956*1841Spraks vml[i].vs_data = NULL; 957*1841Spraks vml[i].vs_addr = NULL; 958*1841Spraks } 959*1841Spraks } 960*1841Spraks 961*1841Spraks /* 962*1841Spraks * Given the vp, off and the uio structure, this routine will do the 963*1841Spraks * the copy (uiomove). If the last page created is partially written, 964*1841Spraks * the rest of the page is zeroed out. It also zeros the beginning of 965*1841Spraks * the first page till the start offset if requested(zerostart). 966*1841Spraks * If pages are to be fetched, it will call the filesystem's getpage 967*1841Spraks * function (VOP_GETPAGE) to get them, otherwise they will be created if 968*1841Spraks * not already present in the page cache. 969*1841Spraks */ 970*1841Spraks int 971*1841Spraks vpm_data_copy(struct vnode *vp, 972*1841Spraks u_offset_t off, 973*1841Spraks size_t len, 974*1841Spraks struct uio *uio, 975*1841Spraks int fetchpage, 976*1841Spraks int *newpage, 977*1841Spraks int zerostart, 978*1841Spraks enum seg_rw rw) 979*1841Spraks { 980*1841Spraks int error; 981*1841Spraks struct vmap vml[MINVMAPS]; 982*1841Spraks enum uio_rw uiorw; 983*1841Spraks int npages = 0; 984*1841Spraks 985*1841Spraks uiorw = (rw == S_WRITE) ? UIO_WRITE : UIO_READ; 986*1841Spraks /* 987*1841Spraks * 'off' will be the offset where the I/O starts. 988*1841Spraks * We get the pages starting at the (off & PAGEMASK) 989*1841Spraks * page boundary. 990*1841Spraks */ 991*1841Spraks error = vpm_map_pages(vp, off, (uint_t)len, 992*1841Spraks fetchpage, vml, MINVMAPS, &npages, rw); 993*1841Spraks 994*1841Spraks if (newpage != NULL) 995*1841Spraks *newpage = npages; 996*1841Spraks if (!error) { 997*1841Spraks int i, pn, slen = len; 998*1841Spraks int pon = off & PAGEOFFSET; 999*1841Spraks 1000*1841Spraks /* 1001*1841Spraks * Clear from the beginning of the page to start offset 1002*1841Spraks * if requested. 1003*1841Spraks */ 1004*1841Spraks if (!fetchpage && zerostart) { 1005*1841Spraks (void) kzero(vml[0].vs_addr, (uint_t)pon); 1006*1841Spraks VPM_DEBUG(vpmd_zerostart); 1007*1841Spraks } 1008*1841Spraks 1009*1841Spraks for (i = 0; !error && slen > 0 && 1010*1841Spraks vml[i].vs_addr != NULL; i++) { 1011*1841Spraks pn = (int)MIN(slen, (PAGESIZE - pon)); 1012*1841Spraks error = uiomove(vml[i].vs_addr + pon, 1013*1841Spraks (long)pn, uiorw, uio); 1014*1841Spraks slen -= pn; 1015*1841Spraks pon = 0; 1016*1841Spraks } 1017*1841Spraks 1018*1841Spraks /* 1019*1841Spraks * When new pages are created, zero out part of the 1020*1841Spraks * page we did not copy to. 1021*1841Spraks */ 1022*1841Spraks if (!fetchpage && npages && 1023*1841Spraks uio->uio_loffset < roundup(off + len, PAGESIZE)) { 1024*1841Spraks int nzero; 1025*1841Spraks 1026*1841Spraks pon = (uio->uio_loffset & PAGEOFFSET); 1027*1841Spraks nzero = PAGESIZE - pon; 1028*1841Spraks i = (uio->uio_loffset - (off & PAGEMASK)) / PAGESIZE; 1029*1841Spraks (void) kzero(vml[i].vs_addr + pon, (uint_t)nzero); 1030*1841Spraks } 1031*1841Spraks vpm_unmap_pages(vml, rw); 1032*1841Spraks } 1033*1841Spraks return (error); 1034*1841Spraks } 1035*1841Spraks 1036*1841Spraks /* 1037*1841Spraks * called to flush pages for the given vnode covering 1038*1841Spraks * [off, off+len] range. 1039*1841Spraks */ 1040*1841Spraks int 1041*1841Spraks vpm_sync_pages(struct vnode *vp, 1042*1841Spraks u_offset_t off, 1043*1841Spraks size_t len, 1044*1841Spraks uint_t flags) 1045*1841Spraks { 1046*1841Spraks extern struct vnode *common_specvp(); 1047*1841Spraks int bflags = 0; 1048*1841Spraks int error = 0; 1049*1841Spraks size_t psize = roundup(len, PAGESIZE); 1050*1841Spraks 1051*1841Spraks /* 1052*1841Spraks * If this is a block device we have to be sure to use the 1053*1841Spraks * "common" block device vnode for the mapping. 1054*1841Spraks */ 1055*1841Spraks if (vp->v_type == VBLK) 1056*1841Spraks vp = common_specvp(vp); 1057*1841Spraks 1058*1841Spraks if ((flags & ~SM_DONTNEED) != 0) { 1059*1841Spraks if (flags & SM_ASYNC) 1060*1841Spraks bflags |= B_ASYNC; 1061*1841Spraks if (flags & SM_INVAL) 1062*1841Spraks bflags |= B_INVAL; 1063*1841Spraks if (flags & SM_DESTROY) 1064*1841Spraks bflags |= (B_INVAL|B_TRUNC); 1065*1841Spraks if (flags & SM_FREE) 1066*1841Spraks bflags |= B_FREE; 1067*1841Spraks if (flags & SM_DONTNEED) 1068*1841Spraks bflags |= B_DONTNEED; 1069*1841Spraks 1070*1841Spraks error = VOP_PUTPAGE(vp, off, psize, bflags, CRED()); 1071*1841Spraks } 1072*1841Spraks 1073*1841Spraks return (error); 1074*1841Spraks } 1075*1841Spraks 1076*1841Spraks 1077*1841Spraks #else /* SEGKPM_SUPPORT */ 1078*1841Spraks 1079*1841Spraks /* vpm stubs */ 1080*1841Spraks void 1081*1841Spraks vpm_init() 1082*1841Spraks { 1083*1841Spraks } 1084*1841Spraks 1085*1841Spraks /*ARGSUSED*/ 1086*1841Spraks int 1087*1841Spraks vpm_pagecreate( 1088*1841Spraks struct vnode *vp, 1089*1841Spraks u_offset_t baseoff, 1090*1841Spraks size_t len, 1091*1841Spraks vmap_t vml[], 1092*1841Spraks int nseg, 1093*1841Spraks int *newpage) 1094*1841Spraks { 1095*1841Spraks return (0); 1096*1841Spraks } 1097*1841Spraks 1098*1841Spraks /*ARGSUSED*/ 1099*1841Spraks int 1100*1841Spraks vpm_map_pages( 1101*1841Spraks struct vnode *vp, 1102*1841Spraks u_offset_t off, 1103*1841Spraks size_t len, 1104*1841Spraks int fetchpage, 1105*1841Spraks vmap_t vml[], 1106*1841Spraks int nseg, 1107*1841Spraks int *newpage, 1108*1841Spraks enum seg_rw rw) 1109*1841Spraks { 1110*1841Spraks return (0); 1111*1841Spraks } 1112*1841Spraks 1113*1841Spraks /*ARGSUSED*/ 1114*1841Spraks int 1115*1841Spraks vpm_data_copy(struct vnode *vp, 1116*1841Spraks u_offset_t off, 1117*1841Spraks size_t len, 1118*1841Spraks struct uio *uio, 1119*1841Spraks int fetchpage, 1120*1841Spraks int *newpage, 1121*1841Spraks int zerostart, 1122*1841Spraks enum seg_rw rw) 1123*1841Spraks { 1124*1841Spraks return (0); 1125*1841Spraks } 1126*1841Spraks 1127*1841Spraks /*ARGSUSED*/ 1128*1841Spraks void 1129*1841Spraks vpm_unmap_pages(vmap_t vml[], enum seg_rw rw) 1130*1841Spraks { 1131*1841Spraks } 1132*1841Spraks /*ARGSUSED*/ 1133*1841Spraks int 1134*1841Spraks vpm_sync_pages(struct vnode *vp, 1135*1841Spraks u_offset_t off, 1136*1841Spraks size_t len, 1137*1841Spraks uint_t flags) 1138*1841Spraks { 1139*1841Spraks return (0); 1140*1841Spraks } 1141*1841Spraks #endif /* SEGKPM_SUPPORT */ 1142