11841Spraks /* 21841Spraks * CDDL HEADER START 31841Spraks * 41841Spraks * The contents of this file are subject to the terms of the 51841Spraks * Common Development and Distribution License (the "License"). 61841Spraks * You may not use this file except in compliance with the License. 71841Spraks * 81841Spraks * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91841Spraks * or http://www.opensolaris.org/os/licensing. 101841Spraks * See the License for the specific language governing permissions 111841Spraks * and limitations under the License. 121841Spraks * 131841Spraks * When distributing Covered Code, include this CDDL HEADER in each 141841Spraks * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151841Spraks * If applicable, add the following below this CDDL HEADER, with the 161841Spraks * fields enclosed by brackets "[]" replaced with your own identifying 171841Spraks * information: Portions Copyright [yyyy] [name of copyright owner] 181841Spraks * 191841Spraks * CDDL HEADER END 201841Spraks */ 211841Spraks /* 22*9281SPrakash.Sangappa@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 231841Spraks * Use is subject to license terms. 241841Spraks */ 251841Spraks 261841Spraks #ifndef _VM_VPM_H 271841Spraks #define _VM_VPM_H 281841Spraks 291841Spraks 301841Spraks #ifdef __cplusplus 311841Spraks extern "C" { 321841Spraks #endif 331841Spraks 341841Spraks /* 351841Spraks * The vnode page mappings(VPM) interfaces. 361841Spraks * "Commitment level - Consolidation private". They are subject 371841Spraks * to change without notice. Use them at your own risk. 381841Spraks * 391841Spraks * At this stage these interfaces are provided only to utilize the 40*9281SPrakash.Sangappa@Sun.COM * segkpm mappings. Therefore these interfaces have to be used under 41*9281SPrakash.Sangappa@Sun.COM * the 'vpm_enable' check as an alternative to segmap interfaces where 42*9281SPrakash.Sangappa@Sun.COM * applicable. 431841Spraks * 441841Spraks * The VPM interfaces provide temporary mappings to file pages. They 451841Spraks * return the mappings in a scatter gather list(SGL). 461841Spraks * The SGL elements are the structure 'vmap_t'. 471841Spraks * 481841Spraks * typedef struct vmap { 49*9281SPrakash.Sangappa@Sun.COM * caddr_t vs_addr; / public - mapped address / 50*9281SPrakash.Sangappa@Sun.COM * size_t vs_len; / public - length of mapping / 511841Spraks * void *vs_data; / opaque - private data / 521841Spraks * } vmap_t; 531841Spraks * 541841Spraks * An array of this structure has to be passed to the interface routines 551841Spraks * along with the size(# of elements) of the SGL array. Depending on the 561841Spraks * requested length and mapped chunk sizes(PAGESIZE here), the number of 571841Spraks * valid mappings returned can be less then actual size of the SGL array. 581841Spraks * Always, an element in the SGL will have 'vs_addr' set to NULL which 591841Spraks * marks the end of the valid entires in the SGL. 601841Spraks * 611841Spraks * The vmap_t structure members are populated with the mapped address 621841Spraks * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the 631841Spraks * mapping length is fixed at PAGESIZE. The 'vs_data' member is private 641841Spraks * and the caller should not access or modify it. 651841Spraks * 661841Spraks * Using a scatter gather list to return the mappings and length makes it 67*9281SPrakash.Sangappa@Sun.COM * possible to provide mappings of variable length. Mapping length upto 68*9281SPrakash.Sangappa@Sun.COM * VPMMAXLEN is supported. The scatter gather list array size needs to 69*9281SPrakash.Sangappa@Sun.COM * be a minimum of MINVMAPS elements. 701841Spraks * 711841Spraks * Interfaces: 721841Spraks * 731841Spraks * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len, 741841Spraks * int fetchpage, vmap_t *vml, int vmlsz, 751841Spraks * int *newpagecreated, enum seg_rw rw); 761841Spraks * 771841Spraks * This function returns mappings to vnode pages. 781841Spraks * 791841Spraks * It takes a vnode, offset and length and returns mappings to the pages 80*9281SPrakash.Sangappa@Sun.COM * covering the range [off, off + len) in the vmap_t SGL array 'vml'. 81*9281SPrakash.Sangappa@Sun.COM * The length passed in should satisfy the following criteria 82*9281SPrakash.Sangappa@Sun.COM * '(off + len) <= ((off & PAGEMASK) + VPMMAXLEN)' 83*9281SPrakash.Sangappa@Sun.COM * The mapped address returned, in 'vs_addr', of first vml[] entry 84*9281SPrakash.Sangappa@Sun.COM * is at begining of page containing 'off'. 851841Spraks * 861841Spraks * The 'vmlsz' is the size(# elements) of the 'vml' array. 871841Spraks * 881841Spraks * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched 891841Spraks * (calls VOP_GETPAGE) from the backing store(disk) if not found in the 901841Spraks * system page cache. If 'fetchpage == 0', the vnode(file) pages for the 911841Spraks * given offset will be just created if they are not already present in the 921841Spraks * system page cache. The 'newpagecreated' flag is set on return if new pages 931841Spraks * are created when 'fetchpage == 0'(requested to just create new pages). 941841Spraks * 951841Spraks * The 'seg_rw rw' indicates the intended operation on these mappings 961841Spraks * (S_WRITE or S_READ). 971841Spraks * 98*9281SPrakash.Sangappa@Sun.COM * Currently these interfaces only return segkpm mappings. The vnode pages 99*9281SPrakash.Sangappa@Sun.COM * that are being accessed will be locked(at least SHARED locked) for the 100*9281SPrakash.Sangappa@Sun.COM * duration these mappings are in use. After use, the unmap function, 101*9281SPrakash.Sangappa@Sun.COM * vpm_unmap_pages(), has to be called and the same SGL array 1021841Spraks * needs to be passed to the unmap function. 1031841Spraks * 1041841Spraks * 1051841Spraks * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);. 1061841Spraks * 1071841Spraks * This function unmaps the pages that where mapped by vpm_map_pages. 1081841Spraks * The SGL array 'vml' has to be the one that was passed to vpm_map_pages(). 1091841Spraks * 1101841Spraks * 1111841Spraks * ex: 1121841Spraks * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer 1131841Spraks * 'buf' the following code snippet shows how to use the above two interfaces. 1141841Spraks * Here the the copy length is till the MAXBSIZE boundary. This code can be 1151841Spraks * executed repeatedly, in a loop to copy more then MAXBSIZE length of data. 1161841Spraks * 1171841Spraks * vmap_t vml[MINVMAPS]; 1181841Spraks * int err, i, newpage, len; 1191841Spraks * int pon; 1201841Spraks * 1211841Spraks * pon = (off & PAGEOFFSET); 1221841Spraks * len = MAXBSIZE - pon; 1231841Spraks * 1241841Spraks * if (vpm_enable) { 1251841Spraks * err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS, 1261841Spraks * &newpage, S_WRITE); 1271841Spraks * 1281841Spraks * if (err) 1291841Spraks * return; 1301841Spraks * 1311841Spraks * for (i=0; vml[i].vs_addr != NULL); i++) { 1321841Spraks * bcopy (buf, vml[i].vs_addr + pon, 1331841Spraks * PAGESIZE - pon); 1341841Spraks * buf += (PAGESIZE - pon); 1351841Spraks * pon = 0; 1361841Spraks * } 1371841Spraks * 1381841Spraks * if (newpage) { 1391841Spraks * pon = (off & PAGEOFFSET); 1401841Spraks * bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon); 1411841Spraks * } 1421841Spraks * 1431841Spraks * vpm_unmap_pages(vml, S_WRITE); 1441841Spraks * } 1451841Spraks * 1461841Spraks * 1471841Spraks * 1481841Spraks * 1491841Spraks * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len, 1501841Spraks * struct uio *uio, int fetchpage, int *newpagecreated, 1511841Spraks * int zerostart, enum seg_rw rw); 1521841Spraks * 1531841Spraks * This function can be called if the need is to just transfer data to/from 1541841Spraks * the vnode pages. It takes a 'uio' structure and calls 'uiomove()' to 1551841Spraks * do the data transfer. It can be used in the context of read and write 1561841Spraks * system calls to transfer data between a user buffer, which is specified 1571841Spraks * in the uio structure, and the vnode pages. If the data needs to be 1581841Spraks * transferred between a kernel buffer and the pages, like in the above 1591841Spraks * example, a uio structure can be set up accordingly and passed. The 'rw' 1601841Spraks * parameter will determine the direction of the data transfer. 1611841Spraks * 1621841Spraks * The 'fetchpage' and 'newpagecreated' are same as explained before. 1631841Spraks * The 'zerostart' flag when set will zero fill start of the page till the 1641841Spraks * offset 'off' in the first page. i.e from 'off & PAGEMASK' to 'off'. 1651841Spraks * 1661841Spraks * 1671841Spraks * int vpm_sync_pages(struct vnode *vp, u_offset_t off, 1681841Spraks * size_t len, uint_t flags) 1691841Spraks * 1701841Spraks * This function can be called to flush or sync the vnode(file) pages that 1711841Spraks * have been accessed. It will call VOP_PUTPAGE(). 1721841Spraks * 1731841Spraks * For the given vnode, off and len the pages covering the range 1741841Spraks * [off, off + len) are flushed. Currently it uses the same flags that 1751841Spraks * are used with segmap_release() interface. Refer vm/seg_map.h. 1761841Spraks * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY) 1771841Spraks * 1781841Spraks */ 1791841Spraks 1801841Spraks 1811841Spraks /* 1821841Spraks * vpm cache related definitions. 1831841Spraks */ 1841841Spraks #define VPMAP_MINCACHE (64 * 1024 * 1024) 185*9281SPrakash.Sangappa@Sun.COM #define VPMAP_MAXCACHE (256L * 1024L * 1024L * 1024L) /* 256G */ 186*9281SPrakash.Sangappa@Sun.COM 1871841Spraks 1881841Spraks /* 1891841Spraks * vpm caching mode 1901841Spraks */ 1911841Spraks #define VPMCACHE_LRU 0 1921841Spraks #define VPMCACHE_RANDOM 1 1931841Spraks /* 1941841Spraks * Data structures to manage the cache of pages referenced by 1951841Spraks * the vpm interfaces. There is one vpmap struct per page in the cache. 1961841Spraks */ 1971841Spraks struct vpmap { 1981841Spraks kmutex_t vpm_mtx; /* protects non list fields */ 1991841Spraks struct vnode *vpm_vp; /* pointer to vnode of cached page */ 2001841Spraks struct vpmap *vpm_next; /* free list pointers */ 2011841Spraks struct vpmap *vpm_prev; 2021841Spraks u_offset_t vpm_off; /* offset of the page */ 2031841Spraks page_t *vpm_pp; /* page pointer */ 2041841Spraks ushort_t vpm_refcnt; /* Number active references */ 2051841Spraks ushort_t vpm_ndxflg; /* indicates which queue */ 2061841Spraks ushort_t vpm_free_ndx; /* freelist it belongs to */ 2071841Spraks }; 2081841Spraks 2091841Spraks /* 2101841Spraks * Multiple vpmap free lists are maintaned so that allocations 2111841Spraks * scale with cpu count. To further reduce contentions between 2121841Spraks * allocation and deallocations, each list is made up of two queues. 2131841Spraks */ 2141841Spraks #define VPM_FREEQ_PAD 64 2151841Spraks union vpm_freeq { 2161841Spraks struct { 2171841Spraks struct vpmap *vpmsq_free; 2181841Spraks kmutex_t vpmsq_mtx; 2191841Spraks } vpmfq; 2201841Spraks char vpmq_pad[VPM_FREEQ_PAD]; 2211841Spraks }; 2221841Spraks 2231841Spraks #define vpmq_free vpmfq.vpmsq_free 2241841Spraks #define vpmq_mtx vpmfq.vpmsq_mtx 2251841Spraks 2261841Spraks struct vpmfree { 2271841Spraks union vpm_freeq vpm_freeq[2]; /* alloc and release queue */ 2281841Spraks union vpm_freeq *vpm_allocq; /* current alloc queue */ 2291841Spraks union vpm_freeq *vpm_releq; /* current release queue */ 2301841Spraks kcondvar_t vpm_free_cv; 2311841Spraks ushort_t vpm_want; 2321841Spraks }; 2331841Spraks 2341841Spraks #define VPMALLOCQ 0 2351841Spraks #define VPMRELEQ 1 2361841Spraks 2371841Spraks /* 2381841Spraks * VPM Interface definitions. 2391841Spraks */ 2401841Spraks 2411841Spraks /* 2421841Spraks * This structure is the scatter gather list element. The page 2431841Spraks * mappings will be returned in this structure. A pointer to an 2441841Spraks * array of this structure is passed to the interface routines. 2451841Spraks */ 2461841Spraks typedef struct vmap { 2471841Spraks caddr_t vs_addr; /* mapped address */ 2481841Spraks size_t vs_len; /* length, currently fixed at PAGESIZE */ 2491841Spraks void *vs_data; /* opaque - private data */ 2501841Spraks } vmap_t; 2511841Spraks 252*9281SPrakash.Sangappa@Sun.COM #define VPM_FETCHPAGE 0x01 /* fault in pages */ 253*9281SPrakash.Sangappa@Sun.COM 254*9281SPrakash.Sangappa@Sun.COM /* 255*9281SPrakash.Sangappa@Sun.COM * Max request length - Needs to be a multiple of 256*9281SPrakash.Sangappa@Sun.COM * 8192 (PAGESIZE on sparc) so it works properly on both 257*9281SPrakash.Sangappa@Sun.COM * x86 & sparc systems. Max set to 128k. 258*9281SPrakash.Sangappa@Sun.COM */ 259*9281SPrakash.Sangappa@Sun.COM #define VPMMAXLEN (128*1024) 260*9281SPrakash.Sangappa@Sun.COM 2611841Spraks /* 2621841Spraks * The minimum and maximum number of array elements in the scatter 2631841Spraks * gather list. 2641841Spraks */ 2651841Spraks #define MINVMAPS 3 /* ((MAXBSIZE/4096 + 1) min # mappings */ 266*9281SPrakash.Sangappa@Sun.COM #if defined(__sparc) 267*9281SPrakash.Sangappa@Sun.COM #define VPMMAXPGS (VPMMAXLEN/8192) /* Max # pages at a time */ 268*9281SPrakash.Sangappa@Sun.COM #else 269*9281SPrakash.Sangappa@Sun.COM #define VPMMAXPGS (VPMMAXLEN/4096) 270*9281SPrakash.Sangappa@Sun.COM #endif 271*9281SPrakash.Sangappa@Sun.COM #define MAXVMAPS (VPMMAXPGS + 1) /* Max # elements in the */ 272*9281SPrakash.Sangappa@Sun.COM /* scatter gather list */ 273*9281SPrakash.Sangappa@Sun.COM /* +1 element to mark the */ 274*9281SPrakash.Sangappa@Sun.COM /* end of the list of valid */ 275*9281SPrakash.Sangappa@Sun.COM /* mappings */ 2761841Spraks 2771841Spraks #ifdef _KERNEL 2781841Spraks 2791841Spraks extern int vpm_enable; 2801841Spraks /* 2811841Spraks * vpm page mapping operations. 2821841Spraks */ 2831841Spraks extern void vpm_init(void); 2841841Spraks extern int vpm_map_pages(struct vnode *, u_offset_t, size_t, int, 2851841Spraks vmap_t *, int, int *, enum seg_rw); 2861841Spraks 2871841Spraks extern void vpm_unmap_pages(vmap_t *, enum seg_rw); 2881841Spraks extern int vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t); 2891841Spraks extern int vpm_data_copy(struct vnode *, u_offset_t, size_t, 2901841Spraks struct uio *, int, int *, int, enum seg_rw rw); 2911841Spraks #endif /* _KERNEL */ 2921841Spraks 2931841Spraks #ifdef __cplusplus 2941841Spraks } 2951841Spraks #endif 2961841Spraks 2971841Spraks #endif /* _VM_VPM_H */ 298