xref: /onnv-gate/usr/src/uts/common/vm/vpm.h (revision 9281:67a3435ea50e)
11841Spraks /*
21841Spraks  * CDDL HEADER START
31841Spraks  *
41841Spraks  * The contents of this file are subject to the terms of the
51841Spraks  * Common Development and Distribution License (the "License").
61841Spraks  * You may not use this file except in compliance with the License.
71841Spraks  *
81841Spraks  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91841Spraks  * or http://www.opensolaris.org/os/licensing.
101841Spraks  * See the License for the specific language governing permissions
111841Spraks  * and limitations under the License.
121841Spraks  *
131841Spraks  * When distributing Covered Code, include this CDDL HEADER in each
141841Spraks  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151841Spraks  * If applicable, add the following below this CDDL HEADER, with the
161841Spraks  * fields enclosed by brackets "[]" replaced with your own identifying
171841Spraks  * information: Portions Copyright [yyyy] [name of copyright owner]
181841Spraks  *
191841Spraks  * CDDL HEADER END
201841Spraks  */
211841Spraks /*
22*9281SPrakash.Sangappa@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
231841Spraks  * Use is subject to license terms.
241841Spraks  */
251841Spraks 
261841Spraks #ifndef	_VM_VPM_H
271841Spraks #define	_VM_VPM_H
281841Spraks 
291841Spraks 
301841Spraks #ifdef	__cplusplus
311841Spraks extern "C" {
321841Spraks #endif
331841Spraks 
341841Spraks /*
351841Spraks  * The vnode page mappings(VPM) interfaces.
361841Spraks  * "Commitment level - Consolidation private". They are subject
371841Spraks  * to change without notice. Use them at your own risk.
381841Spraks  *
391841Spraks  * At this stage these interfaces are provided only to utilize the
40*9281SPrakash.Sangappa@Sun.COM  * segkpm mappings. Therefore these interfaces have to be used under
41*9281SPrakash.Sangappa@Sun.COM  * the 'vpm_enable' check as an alternative to segmap interfaces where
42*9281SPrakash.Sangappa@Sun.COM  * applicable.
431841Spraks  *
441841Spraks  * The VPM interfaces provide temporary mappings to file pages. They
451841Spraks  * return the mappings in a scatter gather list(SGL).
461841Spraks  * The SGL elements are the structure 'vmap_t'.
471841Spraks  *
481841Spraks  *	typedef struct vmap {
49*9281SPrakash.Sangappa@Sun.COM  *		caddr_t	vs_addr;        / public - mapped address /
50*9281SPrakash.Sangappa@Sun.COM  *		size_t	vs_len;         / public - length of mapping /
511841Spraks  *		void	*vs_data;	/ opaque - private data /
521841Spraks  *	} vmap_t;
531841Spraks  *
541841Spraks  * An array of this structure has to be passed to the interface routines
551841Spraks  * along with the size(# of elements) of the SGL array. Depending on the
561841Spraks  * requested length and mapped chunk sizes(PAGESIZE here), the number of
571841Spraks  * valid mappings returned can be less then actual size of the SGL array.
581841Spraks  * Always, an element in the SGL will have 'vs_addr' set to NULL which
591841Spraks  * marks the end of the valid entires in the SGL.
601841Spraks  *
611841Spraks  * The vmap_t structure members are populated with the mapped address
621841Spraks  * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the
631841Spraks  * mapping length is fixed at PAGESIZE. The 'vs_data' member is private
641841Spraks  * and the caller should not access or modify it.
651841Spraks  *
661841Spraks  * Using a scatter gather list to return the mappings and length makes it
67*9281SPrakash.Sangappa@Sun.COM  * possible to provide mappings of variable length. Mapping length upto
68*9281SPrakash.Sangappa@Sun.COM  * VPMMAXLEN is supported.  The scatter gather list array size needs to
69*9281SPrakash.Sangappa@Sun.COM  * be a minimum of MINVMAPS elements.
701841Spraks  *
711841Spraks  * Interfaces:
721841Spraks  *
731841Spraks  * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len,
741841Spraks  *			int fetchpage, vmap_t *vml, int vmlsz,
751841Spraks  *			int *newpagecreated, enum seg_rw rw);
761841Spraks  *
771841Spraks  * This function returns mappings to vnode pages.
781841Spraks  *
791841Spraks  * It takes a vnode, offset and length and returns mappings to the  pages
80*9281SPrakash.Sangappa@Sun.COM  * covering the range [off, off + len) in the vmap_t SGL array 'vml'.
81*9281SPrakash.Sangappa@Sun.COM  * The length passed in should satisfy the following criteria
82*9281SPrakash.Sangappa@Sun.COM  * '(off + len)  <= ((off & PAGEMASK) + VPMMAXLEN)'
83*9281SPrakash.Sangappa@Sun.COM  * The mapped address returned, in 'vs_addr', of first vml[] entry
84*9281SPrakash.Sangappa@Sun.COM  * is at begining of page containing 'off'.
851841Spraks  *
861841Spraks  * The 'vmlsz' is the size(# elements) of the 'vml' array.
871841Spraks  *
881841Spraks  * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched
891841Spraks  * (calls VOP_GETPAGE) from the backing store(disk) if not found in the
901841Spraks  * system page cache. If 'fetchpage == 0', the vnode(file) pages for the
911841Spraks  * given offset will be just created if they are not already present in the
921841Spraks  * system page cache. The 'newpagecreated' flag is set on return if new pages
931841Spraks  * are created when 'fetchpage == 0'(requested to just create new pages).
941841Spraks  *
951841Spraks  * The 'seg_rw rw' indicates the intended operation on these mappings
961841Spraks  * (S_WRITE or S_READ).
971841Spraks  *
98*9281SPrakash.Sangappa@Sun.COM  * Currently these interfaces only return segkpm mappings. The vnode pages
99*9281SPrakash.Sangappa@Sun.COM  * that are being accessed will be locked(at least SHARED locked) for the
100*9281SPrakash.Sangappa@Sun.COM  * duration these mappings are in use. After use, the  unmap function,
101*9281SPrakash.Sangappa@Sun.COM  * vpm_unmap_pages(), has to be called and the same SGL array
1021841Spraks  * needs to be passed to the unmap function.
1031841Spraks  *
1041841Spraks  *
1051841Spraks  * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);.
1061841Spraks  *
1071841Spraks  * This function unmaps the pages that where mapped by vpm_map_pages.
1081841Spraks  * The SGL array 'vml' has to be the one that was passed to vpm_map_pages().
1091841Spraks  *
1101841Spraks  *
1111841Spraks  * ex:
1121841Spraks  * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer
1131841Spraks  * 'buf' the following code snippet shows how to use the above two interfaces.
1141841Spraks  * Here the the copy length is till the MAXBSIZE boundary. This code can be
1151841Spraks  * executed repeatedly, in a loop to copy more then MAXBSIZE length of data.
1161841Spraks  *
1171841Spraks  *	vmap_t  vml[MINVMAPS];
1181841Spraks  *	int err, i, newpage, len;
1191841Spraks  *	int pon;
1201841Spraks  *
1211841Spraks  *	pon = (off & PAGEOFFSET);
1221841Spraks  *	len = MAXBSIZE - pon;
1231841Spraks  *
1241841Spraks  *	if (vpm_enable) {
1251841Spraks  *             err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS,
1261841Spraks  *				 &newpage, S_WRITE);
1271841Spraks  *
1281841Spraks  *		if (err)
1291841Spraks  *			return;
1301841Spraks  *
1311841Spraks  *		for (i=0; vml[i].vs_addr != NULL); i++) {
1321841Spraks  *			bcopy (buf, vml[i].vs_addr + pon,
1331841Spraks  *				 PAGESIZE - pon);
1341841Spraks  *			buf += (PAGESIZE - pon);
1351841Spraks  *			pon = 0;
1361841Spraks  *		}
1371841Spraks  *
1381841Spraks  *		if (newpage) {
1391841Spraks  *			pon = (off & PAGEOFFSET);
1401841Spraks  *			bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon);
1411841Spraks  *		}
1421841Spraks  *
1431841Spraks  *		vpm_unmap_pages(vml, S_WRITE);
1441841Spraks  *	}
1451841Spraks  *
1461841Spraks  *
1471841Spraks  *
1481841Spraks  *
1491841Spraks  * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len,
1501841Spraks  *		struct uio *uio, int fetchpage, int *newpagecreated,
1511841Spraks  *		int zerostart, enum seg_rw rw);
1521841Spraks  *
1531841Spraks  * This function can be called if the need is to just transfer data to/from
1541841Spraks  * the vnode pages. It takes a 'uio' structure and  calls 'uiomove()' to
1551841Spraks  * do the data transfer. It can be used in the context of read and write
1561841Spraks  * system calls to transfer data between a user buffer, which is specified
1571841Spraks  * in the uio structure, and the vnode pages. If the data needs to be
1581841Spraks  * transferred between a kernel buffer and the pages, like in the above
1591841Spraks  * example, a uio structure can be set up accordingly and passed. The 'rw'
1601841Spraks  * parameter will determine the direction of the data transfer.
1611841Spraks  *
1621841Spraks  * The 'fetchpage' and 'newpagecreated' are same as explained before.
1631841Spraks  * The 'zerostart' flag when set will zero fill start of the page till the
1641841Spraks  * offset 'off' in the first page. i.e  from 'off & PAGEMASK' to 'off'.
1651841Spraks  *
1661841Spraks  *
1671841Spraks  * int vpm_sync_pages(struct vnode *vp, u_offset_t off,
1681841Spraks  *					 size_t len, uint_t flags)
1691841Spraks  *
1701841Spraks  * This function can be called to flush or sync the vnode(file) pages that
1711841Spraks  * have been accessed. It will call VOP_PUTPAGE().
1721841Spraks  *
1731841Spraks  * For the given vnode, off and len the pages covering the range
1741841Spraks  * [off, off + len) are flushed. Currently it uses the same flags that
1751841Spraks  * are used with segmap_release() interface. Refer vm/seg_map.h.
1761841Spraks  * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY)
1771841Spraks  *
1781841Spraks  */
1791841Spraks 
1801841Spraks 
1811841Spraks /*
1821841Spraks  * vpm cache related definitions.
1831841Spraks  */
1841841Spraks #define	VPMAP_MINCACHE		(64 * 1024 * 1024)
185*9281SPrakash.Sangappa@Sun.COM #define	VPMAP_MAXCACHE		(256L * 1024L * 1024L * 1024L)  /* 256G */
186*9281SPrakash.Sangappa@Sun.COM 
1871841Spraks 
1881841Spraks /*
1891841Spraks  * vpm caching mode
1901841Spraks  */
1911841Spraks #define	VPMCACHE_LRU		0
1921841Spraks #define	VPMCACHE_RANDOM		1
1931841Spraks /*
1941841Spraks  * Data structures to manage the cache of pages referenced by
1951841Spraks  * the vpm interfaces. There is one vpmap struct per page in the cache.
1961841Spraks  */
1971841Spraks struct vpmap {
1981841Spraks 	kmutex_t	vpm_mtx;	/* protects non list fields */
1991841Spraks 	struct vnode	*vpm_vp;	/* pointer to vnode of cached page */
2001841Spraks 	struct vpmap	*vpm_next;	/* free list pointers */
2011841Spraks 	struct vpmap	*vpm_prev;
2021841Spraks 	u_offset_t	vpm_off;	/* offset of the page */
2031841Spraks 	page_t		*vpm_pp;	/* page pointer */
2041841Spraks 	ushort_t	vpm_refcnt;	/* Number active references */
2051841Spraks 	ushort_t	vpm_ndxflg;	/* indicates which queue */
2061841Spraks 	ushort_t	vpm_free_ndx;	/* freelist it belongs to */
2071841Spraks };
2081841Spraks 
2091841Spraks /*
2101841Spraks  * Multiple vpmap free lists are maintaned so that allocations
2111841Spraks  * scale with cpu count. To further reduce contentions between
2121841Spraks  * allocation and deallocations, each list is made up of two queues.
2131841Spraks  */
2141841Spraks #define	VPM_FREEQ_PAD	64
2151841Spraks union vpm_freeq {
2161841Spraks 	struct {
2171841Spraks 		struct vpmap	*vpmsq_free;
2181841Spraks 		kmutex_t	vpmsq_mtx;
2191841Spraks 	} vpmfq;
2201841Spraks 	char vpmq_pad[VPM_FREEQ_PAD];
2211841Spraks };
2221841Spraks 
2231841Spraks #define	vpmq_free	vpmfq.vpmsq_free
2241841Spraks #define	vpmq_mtx	vpmfq.vpmsq_mtx
2251841Spraks 
2261841Spraks struct vpmfree {
2271841Spraks 	union vpm_freeq vpm_freeq[2];	/* alloc and release queue */
2281841Spraks 	union vpm_freeq *vpm_allocq;	/* current alloc queue */
2291841Spraks 	union vpm_freeq *vpm_releq;	/* current release queue */
2301841Spraks 	kcondvar_t	vpm_free_cv;
2311841Spraks 	ushort_t	vpm_want;
2321841Spraks };
2331841Spraks 
2341841Spraks #define	VPMALLOCQ	0
2351841Spraks #define	VPMRELEQ	1
2361841Spraks 
2371841Spraks /*
2381841Spraks  * VPM Interface definitions.
2391841Spraks  */
2401841Spraks 
2411841Spraks /*
2421841Spraks  * This structure is the scatter gather list element. The page
2431841Spraks  * mappings will be returned in this structure. A pointer to an
2441841Spraks  * array of this structure is passed to the interface routines.
2451841Spraks  */
2461841Spraks typedef struct vmap {
2471841Spraks 	caddr_t	vs_addr;	/* mapped address */
2481841Spraks 	size_t	vs_len;		/* length, currently fixed at PAGESIZE */
2491841Spraks 	void	*vs_data;	/* opaque - private data */
2501841Spraks } vmap_t;
2511841Spraks 
252*9281SPrakash.Sangappa@Sun.COM #define	VPM_FETCHPAGE 0x01	/* fault in pages */
253*9281SPrakash.Sangappa@Sun.COM 
254*9281SPrakash.Sangappa@Sun.COM /*
255*9281SPrakash.Sangappa@Sun.COM  * Max request length - Needs to be a multiple of
256*9281SPrakash.Sangappa@Sun.COM  * 8192 (PAGESIZE on sparc) so it works properly on both
257*9281SPrakash.Sangappa@Sun.COM  * x86 & sparc systems. Max set to 128k.
258*9281SPrakash.Sangappa@Sun.COM  */
259*9281SPrakash.Sangappa@Sun.COM #define	VPMMAXLEN	(128*1024)
260*9281SPrakash.Sangappa@Sun.COM 
2611841Spraks /*
2621841Spraks  * The minimum and maximum number of array elements in the scatter
2631841Spraks  * gather list.
2641841Spraks  */
2651841Spraks #define	MINVMAPS   3		/* ((MAXBSIZE/4096 + 1)  min # mappings */
266*9281SPrakash.Sangappa@Sun.COM #if defined(__sparc)
267*9281SPrakash.Sangappa@Sun.COM #define	VPMMAXPGS	(VPMMAXLEN/8192)	/* Max # pages at a time */
268*9281SPrakash.Sangappa@Sun.COM #else
269*9281SPrakash.Sangappa@Sun.COM #define	VPMMAXPGS	(VPMMAXLEN/4096)
270*9281SPrakash.Sangappa@Sun.COM #endif
271*9281SPrakash.Sangappa@Sun.COM #define	MAXVMAPS	(VPMMAXPGS + 1)		/* Max # elements in the */
272*9281SPrakash.Sangappa@Sun.COM 						/* scatter gather list */
273*9281SPrakash.Sangappa@Sun.COM 						/* +1 element to mark the */
274*9281SPrakash.Sangappa@Sun.COM 						/* end of the list of valid */
275*9281SPrakash.Sangappa@Sun.COM 						/*  mappings */
2761841Spraks 
2771841Spraks #ifdef _KERNEL
2781841Spraks 
2791841Spraks extern int	vpm_enable;
2801841Spraks /*
2811841Spraks  * vpm page mapping operations.
2821841Spraks  */
2831841Spraks extern void	vpm_init(void);
2841841Spraks extern int	vpm_map_pages(struct vnode *, u_offset_t, size_t, int,
2851841Spraks 		vmap_t *, int, int  *, enum seg_rw);
2861841Spraks 
2871841Spraks extern void	vpm_unmap_pages(vmap_t *, enum seg_rw);
2881841Spraks extern int	vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t);
2891841Spraks extern int	vpm_data_copy(struct vnode *, u_offset_t, size_t,
2901841Spraks 		struct uio *, int, int *, int, enum seg_rw rw);
2911841Spraks #endif	/* _KERNEL */
2921841Spraks 
2931841Spraks #ifdef	__cplusplus
2941841Spraks }
2951841Spraks #endif
2961841Spraks 
2971841Spraks #endif	/* _VM_VPM_H */
298