1*1841Spraks /* 2*1841Spraks * CDDL HEADER START 3*1841Spraks * 4*1841Spraks * The contents of this file are subject to the terms of the 5*1841Spraks * Common Development and Distribution License (the "License"). 6*1841Spraks * You may not use this file except in compliance with the License. 7*1841Spraks * 8*1841Spraks * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*1841Spraks * or http://www.opensolaris.org/os/licensing. 10*1841Spraks * See the License for the specific language governing permissions 11*1841Spraks * and limitations under the License. 12*1841Spraks * 13*1841Spraks * When distributing Covered Code, include this CDDL HEADER in each 14*1841Spraks * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*1841Spraks * If applicable, add the following below this CDDL HEADER, with the 16*1841Spraks * fields enclosed by brackets "[]" replaced with your own identifying 17*1841Spraks * information: Portions Copyright [yyyy] [name of copyright owner] 18*1841Spraks * 19*1841Spraks * CDDL HEADER END 20*1841Spraks */ 21*1841Spraks /* 22*1841Spraks * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23*1841Spraks * Use is subject to license terms. 24*1841Spraks */ 25*1841Spraks 26*1841Spraks #ifndef _VM_VPM_H 27*1841Spraks #define _VM_VPM_H 28*1841Spraks 29*1841Spraks #pragma ident "%Z%%M% %I% %E% SMI" 30*1841Spraks 31*1841Spraks #ifdef __cplusplus 32*1841Spraks extern "C" { 33*1841Spraks #endif 34*1841Spraks 35*1841Spraks /* 36*1841Spraks * The vnode page mappings(VPM) interfaces. 37*1841Spraks * "Commitment level - Consolidation private". They are subject 38*1841Spraks * to change without notice. Use them at your own risk. 39*1841Spraks * 40*1841Spraks * At this stage these interfaces are provided only to utilize the 41*1841Spraks * segkpm mappings and are enabled for solaris x64. Therefore these 42*1841Spraks * interfaces have to be used under the 'vpm_enable' check as an 43*1841Spraks * alternative to segmap interfaces where applicable. 44*1841Spraks * 45*1841Spraks * The VPM interfaces provide temporary mappings to file pages. They 46*1841Spraks * return the mappings in a scatter gather list(SGL). 47*1841Spraks * The SGL elements are the structure 'vmap_t'. 48*1841Spraks * 49*1841Spraks * typedef struct vmap { 50*1841Spraks * caddr_t vs_addr; / public / 51*1841Spraks * size_t vs_len; / public - Currently not used / 52*1841Spraks * void *vs_data; / opaque - private data / 53*1841Spraks * } vmap_t; 54*1841Spraks * 55*1841Spraks * An array of this structure has to be passed to the interface routines 56*1841Spraks * along with the size(# of elements) of the SGL array. Depending on the 57*1841Spraks * requested length and mapped chunk sizes(PAGESIZE here), the number of 58*1841Spraks * valid mappings returned can be less then actual size of the SGL array. 59*1841Spraks * Always, an element in the SGL will have 'vs_addr' set to NULL which 60*1841Spraks * marks the end of the valid entires in the SGL. 61*1841Spraks * 62*1841Spraks * The vmap_t structure members are populated with the mapped address 63*1841Spraks * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the 64*1841Spraks * mapping length is fixed at PAGESIZE. The 'vs_data' member is private 65*1841Spraks * and the caller should not access or modify it. 66*1841Spraks * 67*1841Spraks * Using a scatter gather list to return the mappings and length makes it 68*1841Spraks * possible to provide mappings of variable length. Currently mapping length 69*1841Spraks * of only 'PAGESIZE' per vmap_t is possible. Also, similar to the segmap 70*1841Spraks * interfaces, on each request, the max length of 'MAXBSIZE' is supported 71*1841Spraks * for now. The MAXBSIZE mappings will be returned in 1 or 2 vmap_t elements 72*1841Spraks * of the SGL depending on the PAGESIZE. The scatter gather list array size 73*1841Spraks * needs to be a minimum of MINVMAPS elements to accommodate MAXBSIZE. 74*1841Spraks * The MAXBSIZE restriction exists because the filesystems are not capable 75*1841Spraks * of handling more(disk block allocations at a time) for now. 76*1841Spraks * 77*1841Spraks * 78*1841Spraks * Interfaces: 79*1841Spraks * 80*1841Spraks * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len, 81*1841Spraks * int fetchpage, vmap_t *vml, int vmlsz, 82*1841Spraks * int *newpagecreated, enum seg_rw rw); 83*1841Spraks * 84*1841Spraks * This function returns mappings to vnode pages. 85*1841Spraks * 86*1841Spraks * It takes a vnode, offset and length and returns mappings to the pages 87*1841Spraks * covering the range [off, off +len) in the vmap_t SGL array 'vml'. 88*1841Spraks * Currently these interfaces are subject to restrictions similar to the segmap 89*1841Spraks * interfaces. The length passed in should satisfy the following criteria. 90*1841Spraks * '(off + len) <= ((off & PAGEMASK) + MAXBSIZE)' 91*1841Spraks * The mapped address returned, in 'vs_addr', are for the page boundary. 92*1841Spraks * 93*1841Spraks * The 'vmlsz' is the size(# elements) of the 'vml' array. 94*1841Spraks * 95*1841Spraks * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched 96*1841Spraks * (calls VOP_GETPAGE) from the backing store(disk) if not found in the 97*1841Spraks * system page cache. If 'fetchpage == 0', the vnode(file) pages for the 98*1841Spraks * given offset will be just created if they are not already present in the 99*1841Spraks * system page cache. The 'newpagecreated' flag is set on return if new pages 100*1841Spraks * are created when 'fetchpage == 0'(requested to just create new pages). 101*1841Spraks * 102*1841Spraks * The 'seg_rw rw' indicates the intended operation on these mappings 103*1841Spraks * (S_WRITE or S_READ). 104*1841Spraks * 105*1841Spraks * Currently these interfaces only return segkpm mappings. Therefore the 106*1841Spraks * vnode pages that are being accessed will be locked(at least SHARED locked) 107*1841Spraks * for the duration these mappings are in use. After use, the unmap 108*1841Spraks * function, vpm_unmap_pages(), has to be called and the same SGL array 109*1841Spraks * needs to be passed to the unmap function. 110*1841Spraks * 111*1841Spraks * 112*1841Spraks * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);. 113*1841Spraks * 114*1841Spraks * This function unmaps the pages that where mapped by vpm_map_pages. 115*1841Spraks * The SGL array 'vml' has to be the one that was passed to vpm_map_pages(). 116*1841Spraks * 117*1841Spraks * 118*1841Spraks * ex: 119*1841Spraks * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer 120*1841Spraks * 'buf' the following code snippet shows how to use the above two interfaces. 121*1841Spraks * Here the the copy length is till the MAXBSIZE boundary. This code can be 122*1841Spraks * executed repeatedly, in a loop to copy more then MAXBSIZE length of data. 123*1841Spraks * 124*1841Spraks * vmap_t vml[MINVMAPS]; 125*1841Spraks * int err, i, newpage, len; 126*1841Spraks * int pon; 127*1841Spraks * 128*1841Spraks * pon = (off & PAGEOFFSET); 129*1841Spraks * len = MAXBSIZE - pon; 130*1841Spraks * 131*1841Spraks * if (vpm_enable) { 132*1841Spraks * err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS, 133*1841Spraks * &newpage, S_WRITE); 134*1841Spraks * 135*1841Spraks * if (err) 136*1841Spraks * return; 137*1841Spraks * 138*1841Spraks * for (i=0; vml[i].vs_addr != NULL); i++) { 139*1841Spraks * bcopy (buf, vml[i].vs_addr + pon, 140*1841Spraks * PAGESIZE - pon); 141*1841Spraks * buf += (PAGESIZE - pon); 142*1841Spraks * pon = 0; 143*1841Spraks * } 144*1841Spraks * 145*1841Spraks * if (newpage) { 146*1841Spraks * pon = (off & PAGEOFFSET); 147*1841Spraks * bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon); 148*1841Spraks * } 149*1841Spraks * 150*1841Spraks * vpm_unmap_pages(vml, S_WRITE); 151*1841Spraks * } 152*1841Spraks * 153*1841Spraks * 154*1841Spraks * 155*1841Spraks * 156*1841Spraks * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len, 157*1841Spraks * struct uio *uio, int fetchpage, int *newpagecreated, 158*1841Spraks * int zerostart, enum seg_rw rw); 159*1841Spraks * 160*1841Spraks * This function can be called if the need is to just transfer data to/from 161*1841Spraks * the vnode pages. It takes a 'uio' structure and calls 'uiomove()' to 162*1841Spraks * do the data transfer. It can be used in the context of read and write 163*1841Spraks * system calls to transfer data between a user buffer, which is specified 164*1841Spraks * in the uio structure, and the vnode pages. If the data needs to be 165*1841Spraks * transferred between a kernel buffer and the pages, like in the above 166*1841Spraks * example, a uio structure can be set up accordingly and passed. The 'rw' 167*1841Spraks * parameter will determine the direction of the data transfer. 168*1841Spraks * 169*1841Spraks * The 'fetchpage' and 'newpagecreated' are same as explained before. 170*1841Spraks * The 'zerostart' flag when set will zero fill start of the page till the 171*1841Spraks * offset 'off' in the first page. i.e from 'off & PAGEMASK' to 'off'. 172*1841Spraks * Here too the MAXBSIZE restriction mentioned above applies to the length 173*1841Spraks * requested. 174*1841Spraks * 175*1841Spraks * 176*1841Spraks * int vpm_sync_pages(struct vnode *vp, u_offset_t off, 177*1841Spraks * size_t len, uint_t flags) 178*1841Spraks * 179*1841Spraks * This function can be called to flush or sync the vnode(file) pages that 180*1841Spraks * have been accessed. It will call VOP_PUTPAGE(). 181*1841Spraks * 182*1841Spraks * For the given vnode, off and len the pages covering the range 183*1841Spraks * [off, off + len) are flushed. Currently it uses the same flags that 184*1841Spraks * are used with segmap_release() interface. Refer vm/seg_map.h. 185*1841Spraks * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY) 186*1841Spraks * 187*1841Spraks */ 188*1841Spraks 189*1841Spraks 190*1841Spraks /* 191*1841Spraks * vpm cache related definitions. 192*1841Spraks */ 193*1841Spraks #define VPMAP_MINCACHE (64 * 1024 * 1024) 194*1841Spraks 195*1841Spraks /* 196*1841Spraks * vpm caching mode 197*1841Spraks */ 198*1841Spraks #define VPMCACHE_LRU 0 199*1841Spraks #define VPMCACHE_RANDOM 1 200*1841Spraks /* 201*1841Spraks * Data structures to manage the cache of pages referenced by 202*1841Spraks * the vpm interfaces. There is one vpmap struct per page in the cache. 203*1841Spraks */ 204*1841Spraks struct vpmap { 205*1841Spraks kmutex_t vpm_mtx; /* protects non list fields */ 206*1841Spraks struct vnode *vpm_vp; /* pointer to vnode of cached page */ 207*1841Spraks struct vpmap *vpm_next; /* free list pointers */ 208*1841Spraks struct vpmap *vpm_prev; 209*1841Spraks u_offset_t vpm_off; /* offset of the page */ 210*1841Spraks page_t *vpm_pp; /* page pointer */ 211*1841Spraks ushort_t vpm_refcnt; /* Number active references */ 212*1841Spraks ushort_t vpm_ndxflg; /* indicates which queue */ 213*1841Spraks ushort_t vpm_free_ndx; /* freelist it belongs to */ 214*1841Spraks }; 215*1841Spraks 216*1841Spraks /* 217*1841Spraks * Multiple vpmap free lists are maintaned so that allocations 218*1841Spraks * scale with cpu count. To further reduce contentions between 219*1841Spraks * allocation and deallocations, each list is made up of two queues. 220*1841Spraks */ 221*1841Spraks #define VPM_FREEQ_PAD 64 222*1841Spraks union vpm_freeq { 223*1841Spraks struct { 224*1841Spraks struct vpmap *vpmsq_free; 225*1841Spraks kmutex_t vpmsq_mtx; 226*1841Spraks } vpmfq; 227*1841Spraks char vpmq_pad[VPM_FREEQ_PAD]; 228*1841Spraks }; 229*1841Spraks 230*1841Spraks #define vpmq_free vpmfq.vpmsq_free 231*1841Spraks #define vpmq_mtx vpmfq.vpmsq_mtx 232*1841Spraks 233*1841Spraks struct vpmfree { 234*1841Spraks union vpm_freeq vpm_freeq[2]; /* alloc and release queue */ 235*1841Spraks union vpm_freeq *vpm_allocq; /* current alloc queue */ 236*1841Spraks union vpm_freeq *vpm_releq; /* current release queue */ 237*1841Spraks kcondvar_t vpm_free_cv; 238*1841Spraks ushort_t vpm_want; 239*1841Spraks }; 240*1841Spraks 241*1841Spraks #define VPMALLOCQ 0 242*1841Spraks #define VPMRELEQ 1 243*1841Spraks 244*1841Spraks /* 245*1841Spraks * VPM Interface definitions. 246*1841Spraks */ 247*1841Spraks 248*1841Spraks /* 249*1841Spraks * This structure is the scatter gather list element. The page 250*1841Spraks * mappings will be returned in this structure. A pointer to an 251*1841Spraks * array of this structure is passed to the interface routines. 252*1841Spraks */ 253*1841Spraks typedef struct vmap { 254*1841Spraks caddr_t vs_addr; /* mapped address */ 255*1841Spraks size_t vs_len; /* length, currently fixed at PAGESIZE */ 256*1841Spraks void *vs_data; /* opaque - private data */ 257*1841Spraks } vmap_t; 258*1841Spraks 259*1841Spraks /* 260*1841Spraks * The minimum and maximum number of array elements in the scatter 261*1841Spraks * gather list. 262*1841Spraks */ 263*1841Spraks #define MINVMAPS 3 /* ((MAXBSIZE/4096 + 1) min # mappings */ 264*1841Spraks #define MAXVMAPS 10 /* Max # the scatter gather list */ 265*1841Spraks 266*1841Spraks #ifdef _KERNEL 267*1841Spraks 268*1841Spraks extern int vpm_enable; 269*1841Spraks /* 270*1841Spraks * vpm page mapping operations. 271*1841Spraks */ 272*1841Spraks extern void vpm_init(void); 273*1841Spraks extern int vpm_map_pages(struct vnode *, u_offset_t, size_t, int, 274*1841Spraks vmap_t *, int, int *, enum seg_rw); 275*1841Spraks 276*1841Spraks extern void vpm_unmap_pages(vmap_t *, enum seg_rw); 277*1841Spraks extern int vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t); 278*1841Spraks extern int vpm_data_copy(struct vnode *, u_offset_t, size_t, 279*1841Spraks struct uio *, int, int *, int, enum seg_rw rw); 280*1841Spraks #endif /* _KERNEL */ 281*1841Spraks 282*1841Spraks #ifdef __cplusplus 283*1841Spraks } 284*1841Spraks #endif 285*1841Spraks 286*1841Spraks #endif /* _VM_VPM_H */ 287