xref: /onnv-gate/usr/src/uts/common/vm/vpm.h (revision 1841)
1*1841Spraks /*
2*1841Spraks  * CDDL HEADER START
3*1841Spraks  *
4*1841Spraks  * The contents of this file are subject to the terms of the
5*1841Spraks  * Common Development and Distribution License (the "License").
6*1841Spraks  * You may not use this file except in compliance with the License.
7*1841Spraks  *
8*1841Spraks  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*1841Spraks  * or http://www.opensolaris.org/os/licensing.
10*1841Spraks  * See the License for the specific language governing permissions
11*1841Spraks  * and limitations under the License.
12*1841Spraks  *
13*1841Spraks  * When distributing Covered Code, include this CDDL HEADER in each
14*1841Spraks  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*1841Spraks  * If applicable, add the following below this CDDL HEADER, with the
16*1841Spraks  * fields enclosed by brackets "[]" replaced with your own identifying
17*1841Spraks  * information: Portions Copyright [yyyy] [name of copyright owner]
18*1841Spraks  *
19*1841Spraks  * CDDL HEADER END
20*1841Spraks  */
21*1841Spraks /*
22*1841Spraks  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23*1841Spraks  * Use is subject to license terms.
24*1841Spraks  */
25*1841Spraks 
26*1841Spraks #ifndef	_VM_VPM_H
27*1841Spraks #define	_VM_VPM_H
28*1841Spraks 
29*1841Spraks #pragma ident	"%Z%%M%	%I%	%E% SMI"
30*1841Spraks 
31*1841Spraks #ifdef	__cplusplus
32*1841Spraks extern "C" {
33*1841Spraks #endif
34*1841Spraks 
35*1841Spraks /*
36*1841Spraks  * The vnode page mappings(VPM) interfaces.
37*1841Spraks  * "Commitment level - Consolidation private". They are subject
38*1841Spraks  * to change without notice. Use them at your own risk.
39*1841Spraks  *
40*1841Spraks  * At this stage these interfaces are provided only to utilize the
41*1841Spraks  * segkpm mappings and are enabled for solaris x64. Therefore these
42*1841Spraks  * interfaces have to be used under the 'vpm_enable' check as an
43*1841Spraks  * alternative to segmap interfaces where applicable.
44*1841Spraks  *
45*1841Spraks  * The VPM interfaces provide temporary mappings to file pages. They
46*1841Spraks  * return the mappings in a scatter gather list(SGL).
47*1841Spraks  * The SGL elements are the structure 'vmap_t'.
48*1841Spraks  *
49*1841Spraks  *	typedef struct vmap {
50*1841Spraks  *		caddr_t	vs_addr;        / public /
51*1841Spraks  *		size_t	vs_len;         / public - Currently not used /
52*1841Spraks  *		void	*vs_data;	/ opaque - private data /
53*1841Spraks  *	} vmap_t;
54*1841Spraks  *
55*1841Spraks  * An array of this structure has to be passed to the interface routines
56*1841Spraks  * along with the size(# of elements) of the SGL array. Depending on the
57*1841Spraks  * requested length and mapped chunk sizes(PAGESIZE here), the number of
58*1841Spraks  * valid mappings returned can be less then actual size of the SGL array.
59*1841Spraks  * Always, an element in the SGL will have 'vs_addr' set to NULL which
60*1841Spraks  * marks the end of the valid entires in the SGL.
61*1841Spraks  *
62*1841Spraks  * The vmap_t structure members are populated with the mapped address
63*1841Spraks  * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the
64*1841Spraks  * mapping length is fixed at PAGESIZE. The 'vs_data' member is private
65*1841Spraks  * and the caller should not access or modify it.
66*1841Spraks  *
67*1841Spraks  * Using a scatter gather list to return the mappings and length makes it
68*1841Spraks  * possible to provide mappings of variable length. Currently mapping length
69*1841Spraks  * of only 'PAGESIZE' per vmap_t is possible. Also, similar to the segmap
70*1841Spraks  * interfaces, on each request, the max length of 'MAXBSIZE' is supported
71*1841Spraks  * for now. The MAXBSIZE mappings will be returned in 1 or 2 vmap_t elements
72*1841Spraks  * of the SGL depending on the PAGESIZE. The scatter gather list array size
73*1841Spraks  * needs to be a minimum of MINVMAPS elements to accommodate MAXBSIZE.
74*1841Spraks  * The MAXBSIZE restriction exists because the filesystems are not capable
75*1841Spraks  * of handling more(disk block allocations at a time) for now.
76*1841Spraks  *
77*1841Spraks  *
78*1841Spraks  * Interfaces:
79*1841Spraks  *
80*1841Spraks  * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len,
81*1841Spraks  *			int fetchpage, vmap_t *vml, int vmlsz,
82*1841Spraks  *			int *newpagecreated, enum seg_rw rw);
83*1841Spraks  *
84*1841Spraks  * This function returns mappings to vnode pages.
85*1841Spraks  *
86*1841Spraks  * It takes a vnode, offset and length and returns mappings to the  pages
87*1841Spraks  * covering the range [off, off +len) in the vmap_t SGL array 'vml'.
88*1841Spraks  * Currently these interfaces are subject to restrictions similar to the segmap
89*1841Spraks  * interfaces. The length passed in should satisfy the following criteria.
90*1841Spraks  * '(off + len)  <= ((off & PAGEMASK) + MAXBSIZE)'
91*1841Spraks  * The mapped address returned, in 'vs_addr', are for the page boundary.
92*1841Spraks  *
93*1841Spraks  * The 'vmlsz' is the size(# elements) of the 'vml' array.
94*1841Spraks  *
95*1841Spraks  * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched
96*1841Spraks  * (calls VOP_GETPAGE) from the backing store(disk) if not found in the
97*1841Spraks  * system page cache. If 'fetchpage == 0', the vnode(file) pages for the
98*1841Spraks  * given offset will be just created if they are not already present in the
99*1841Spraks  * system page cache. The 'newpagecreated' flag is set on return if new pages
100*1841Spraks  * are created when 'fetchpage == 0'(requested to just create new pages).
101*1841Spraks  *
102*1841Spraks  * The 'seg_rw rw' indicates the intended operation on these mappings
103*1841Spraks  * (S_WRITE or S_READ).
104*1841Spraks  *
105*1841Spraks  * Currently these interfaces only return segkpm mappings. Therefore the
106*1841Spraks  * vnode pages that are being accessed will be locked(at least SHARED locked)
107*1841Spraks  * for the duration these mappings are in use. After use, the  unmap
108*1841Spraks  * function, vpm_unmap_pages(), has to be called and the same SGL array
109*1841Spraks  * needs to be passed to the unmap function.
110*1841Spraks  *
111*1841Spraks  *
112*1841Spraks  * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);.
113*1841Spraks  *
114*1841Spraks  * This function unmaps the pages that where mapped by vpm_map_pages.
115*1841Spraks  * The SGL array 'vml' has to be the one that was passed to vpm_map_pages().
116*1841Spraks  *
117*1841Spraks  *
118*1841Spraks  * ex:
119*1841Spraks  * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer
120*1841Spraks  * 'buf' the following code snippet shows how to use the above two interfaces.
121*1841Spraks  * Here the the copy length is till the MAXBSIZE boundary. This code can be
122*1841Spraks  * executed repeatedly, in a loop to copy more then MAXBSIZE length of data.
123*1841Spraks  *
124*1841Spraks  *	vmap_t  vml[MINVMAPS];
125*1841Spraks  *	int err, i, newpage, len;
126*1841Spraks  *	int pon;
127*1841Spraks  *
128*1841Spraks  *	pon = (off & PAGEOFFSET);
129*1841Spraks  *	len = MAXBSIZE - pon;
130*1841Spraks  *
131*1841Spraks  *	if (vpm_enable) {
132*1841Spraks  *             err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS,
133*1841Spraks  *				 &newpage, S_WRITE);
134*1841Spraks  *
135*1841Spraks  *		if (err)
136*1841Spraks  *			return;
137*1841Spraks  *
138*1841Spraks  *		for (i=0; vml[i].vs_addr != NULL); i++) {
139*1841Spraks  *			bcopy (buf, vml[i].vs_addr + pon,
140*1841Spraks  *				 PAGESIZE - pon);
141*1841Spraks  *			buf += (PAGESIZE - pon);
142*1841Spraks  *			pon = 0;
143*1841Spraks  *		}
144*1841Spraks  *
145*1841Spraks  *		if (newpage) {
146*1841Spraks  *			pon = (off & PAGEOFFSET);
147*1841Spraks  *			bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon);
148*1841Spraks  *		}
149*1841Spraks  *
150*1841Spraks  *		vpm_unmap_pages(vml, S_WRITE);
151*1841Spraks  *	}
152*1841Spraks  *
153*1841Spraks  *
154*1841Spraks  *
155*1841Spraks  *
156*1841Spraks  * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len,
157*1841Spraks  *		struct uio *uio, int fetchpage, int *newpagecreated,
158*1841Spraks  *		int zerostart, enum seg_rw rw);
159*1841Spraks  *
160*1841Spraks  * This function can be called if the need is to just transfer data to/from
161*1841Spraks  * the vnode pages. It takes a 'uio' structure and  calls 'uiomove()' to
162*1841Spraks  * do the data transfer. It can be used in the context of read and write
163*1841Spraks  * system calls to transfer data between a user buffer, which is specified
164*1841Spraks  * in the uio structure, and the vnode pages. If the data needs to be
165*1841Spraks  * transferred between a kernel buffer and the pages, like in the above
166*1841Spraks  * example, a uio structure can be set up accordingly and passed. The 'rw'
167*1841Spraks  * parameter will determine the direction of the data transfer.
168*1841Spraks  *
169*1841Spraks  * The 'fetchpage' and 'newpagecreated' are same as explained before.
170*1841Spraks  * The 'zerostart' flag when set will zero fill start of the page till the
171*1841Spraks  * offset 'off' in the first page. i.e  from 'off & PAGEMASK' to 'off'.
172*1841Spraks  * Here too the MAXBSIZE restriction mentioned above applies to the length
173*1841Spraks  * requested.
174*1841Spraks  *
175*1841Spraks  *
176*1841Spraks  * int vpm_sync_pages(struct vnode *vp, u_offset_t off,
177*1841Spraks  *					 size_t len, uint_t flags)
178*1841Spraks  *
179*1841Spraks  * This function can be called to flush or sync the vnode(file) pages that
180*1841Spraks  * have been accessed. It will call VOP_PUTPAGE().
181*1841Spraks  *
182*1841Spraks  * For the given vnode, off and len the pages covering the range
183*1841Spraks  * [off, off + len) are flushed. Currently it uses the same flags that
184*1841Spraks  * are used with segmap_release() interface. Refer vm/seg_map.h.
185*1841Spraks  * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY)
186*1841Spraks  *
187*1841Spraks  */
188*1841Spraks 
189*1841Spraks 
190*1841Spraks /*
191*1841Spraks  * vpm cache related definitions.
192*1841Spraks  */
193*1841Spraks #define	VPMAP_MINCACHE		(64 * 1024 * 1024)
194*1841Spraks 
195*1841Spraks /*
196*1841Spraks  * vpm caching mode
197*1841Spraks  */
198*1841Spraks #define	VPMCACHE_LRU		0
199*1841Spraks #define	VPMCACHE_RANDOM		1
200*1841Spraks /*
201*1841Spraks  * Data structures to manage the cache of pages referenced by
202*1841Spraks  * the vpm interfaces. There is one vpmap struct per page in the cache.
203*1841Spraks  */
204*1841Spraks struct vpmap {
205*1841Spraks 	kmutex_t	vpm_mtx;	/* protects non list fields */
206*1841Spraks 	struct vnode	*vpm_vp;	/* pointer to vnode of cached page */
207*1841Spraks 	struct vpmap	*vpm_next;	/* free list pointers */
208*1841Spraks 	struct vpmap	*vpm_prev;
209*1841Spraks 	u_offset_t	vpm_off;	/* offset of the page */
210*1841Spraks 	page_t		*vpm_pp;	/* page pointer */
211*1841Spraks 	ushort_t	vpm_refcnt;	/* Number active references */
212*1841Spraks 	ushort_t	vpm_ndxflg;	/* indicates which queue */
213*1841Spraks 	ushort_t	vpm_free_ndx;	/* freelist it belongs to */
214*1841Spraks };
215*1841Spraks 
216*1841Spraks /*
217*1841Spraks  * Multiple vpmap free lists are maintaned so that allocations
218*1841Spraks  * scale with cpu count. To further reduce contentions between
219*1841Spraks  * allocation and deallocations, each list is made up of two queues.
220*1841Spraks  */
221*1841Spraks #define	VPM_FREEQ_PAD	64
222*1841Spraks union vpm_freeq {
223*1841Spraks 	struct {
224*1841Spraks 		struct vpmap	*vpmsq_free;
225*1841Spraks 		kmutex_t	vpmsq_mtx;
226*1841Spraks 	} vpmfq;
227*1841Spraks 	char vpmq_pad[VPM_FREEQ_PAD];
228*1841Spraks };
229*1841Spraks 
230*1841Spraks #define	vpmq_free	vpmfq.vpmsq_free
231*1841Spraks #define	vpmq_mtx	vpmfq.vpmsq_mtx
232*1841Spraks 
233*1841Spraks struct vpmfree {
234*1841Spraks 	union vpm_freeq vpm_freeq[2];	/* alloc and release queue */
235*1841Spraks 	union vpm_freeq *vpm_allocq;	/* current alloc queue */
236*1841Spraks 	union vpm_freeq *vpm_releq;	/* current release queue */
237*1841Spraks 	kcondvar_t	vpm_free_cv;
238*1841Spraks 	ushort_t	vpm_want;
239*1841Spraks };
240*1841Spraks 
241*1841Spraks #define	VPMALLOCQ	0
242*1841Spraks #define	VPMRELEQ	1
243*1841Spraks 
244*1841Spraks /*
245*1841Spraks  * VPM Interface definitions.
246*1841Spraks  */
247*1841Spraks 
248*1841Spraks /*
249*1841Spraks  * This structure is the scatter gather list element. The page
250*1841Spraks  * mappings will be returned in this structure. A pointer to an
251*1841Spraks  * array of this structure is passed to the interface routines.
252*1841Spraks  */
253*1841Spraks typedef struct vmap {
254*1841Spraks 	caddr_t	vs_addr;	/* mapped address */
255*1841Spraks 	size_t	vs_len;		/* length, currently fixed at PAGESIZE */
256*1841Spraks 	void	*vs_data;	/* opaque - private data */
257*1841Spraks } vmap_t;
258*1841Spraks 
259*1841Spraks /*
260*1841Spraks  * The minimum and maximum number of array elements in the scatter
261*1841Spraks  * gather list.
262*1841Spraks  */
263*1841Spraks #define	MINVMAPS   3		/* ((MAXBSIZE/4096 + 1)  min # mappings */
264*1841Spraks #define	MAXVMAPS   10		/* Max # the scatter gather list */
265*1841Spraks 
266*1841Spraks #ifdef _KERNEL
267*1841Spraks 
268*1841Spraks extern int	vpm_enable;
269*1841Spraks /*
270*1841Spraks  * vpm page mapping operations.
271*1841Spraks  */
272*1841Spraks extern void	vpm_init(void);
273*1841Spraks extern int	vpm_map_pages(struct vnode *, u_offset_t, size_t, int,
274*1841Spraks 		vmap_t *, int, int  *, enum seg_rw);
275*1841Spraks 
276*1841Spraks extern void	vpm_unmap_pages(vmap_t *, enum seg_rw);
277*1841Spraks extern int	vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t);
278*1841Spraks extern int	vpm_data_copy(struct vnode *, u_offset_t, size_t,
279*1841Spraks 		struct uio *, int, int *, int, enum seg_rw rw);
280*1841Spraks #endif	/* _KERNEL */
281*1841Spraks 
282*1841Spraks #ifdef	__cplusplus
283*1841Spraks }
284*1841Spraks #endif
285*1841Spraks 
286*1841Spraks #endif	/* _VM_VPM_H */
287