xref: /onnv-gate/usr/src/uts/common/vm/seg_map.h (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28*0Sstevel@tonic-gate /*	  All Rights Reserved  	*/
29*0Sstevel@tonic-gate 
30*0Sstevel@tonic-gate /*
31*0Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
32*0Sstevel@tonic-gate  * The Regents of the University of California
33*0Sstevel@tonic-gate  * All Rights Reserved
34*0Sstevel@tonic-gate  *
35*0Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
36*0Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
37*0Sstevel@tonic-gate  * contributors.
38*0Sstevel@tonic-gate  */
39*0Sstevel@tonic-gate 
40*0Sstevel@tonic-gate #ifndef	_VM_SEG_MAP_H
41*0Sstevel@tonic-gate #define	_VM_SEG_MAP_H
42*0Sstevel@tonic-gate 
43*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
44*0Sstevel@tonic-gate 
45*0Sstevel@tonic-gate #ifdef	__cplusplus
46*0Sstevel@tonic-gate extern "C" {
47*0Sstevel@tonic-gate #endif
48*0Sstevel@tonic-gate 
49*0Sstevel@tonic-gate /*
50*0Sstevel@tonic-gate  * When segmap is created it is possible to program its behavior,
51*0Sstevel@tonic-gate  *	using the create args [needed for performance reasons].
52*0Sstevel@tonic-gate  * Segmap creates n lists of pages.
53*0Sstevel@tonic-gate  *	For VAC machines, there will be at least one free list
54*0Sstevel@tonic-gate  *	per color. If more than one free list per color is needed,
55*0Sstevel@tonic-gate  *	set nfreelist as needed.
56*0Sstevel@tonic-gate  *
57*0Sstevel@tonic-gate  *	For PAC machines, it will be treated as VAC with only one
58*0Sstevel@tonic-gate  *	color- every page is of the same color. Again, set nfreelist
59*0Sstevel@tonic-gate  *	to get more than one free list.
60*0Sstevel@tonic-gate  */
61*0Sstevel@tonic-gate struct segmap_crargs {
62*0Sstevel@tonic-gate 	uint_t	prot;
63*0Sstevel@tonic-gate 	uint_t	shmsize;	/* shm_alignment for VAC, 0 for PAC. */
64*0Sstevel@tonic-gate 	uint_t	nfreelist;	/* number of freelist per color, >= 1 */
65*0Sstevel@tonic-gate };
66*0Sstevel@tonic-gate 
67*0Sstevel@tonic-gate #include <vm/kpm.h>
68*0Sstevel@tonic-gate 
69*0Sstevel@tonic-gate /*
70*0Sstevel@tonic-gate  * Each smap struct represents a MAXBSIZE sized mapping to the
71*0Sstevel@tonic-gate  * <sm_vp, sm_off> given in the structure.  The location of the
72*0Sstevel@tonic-gate  * the structure in the array gives the virtual address of the
73*0Sstevel@tonic-gate  * mapping. Structure rearranged for 64bit sm_off.
74*0Sstevel@tonic-gate  */
75*0Sstevel@tonic-gate struct	smap {
76*0Sstevel@tonic-gate 	kmutex_t	sm_mtx;		/* protect non-list fields */
77*0Sstevel@tonic-gate 	struct	vnode	*sm_vp;		/* vnode pointer (if mapped) */
78*0Sstevel@tonic-gate 	struct	smap	*sm_hash;	/* hash pointer */
79*0Sstevel@tonic-gate 	struct	smap	*sm_next;	/* next pointer */
80*0Sstevel@tonic-gate 	struct	smap	*sm_prev;	/* previous pointer */
81*0Sstevel@tonic-gate 	u_offset_t	sm_off;		/* file offset for mapping */
82*0Sstevel@tonic-gate 	ushort_t	sm_bitmap;	/* bit map for locked translations */
83*0Sstevel@tonic-gate 	ushort_t	sm_refcnt;	/* reference count for uses */
84*0Sstevel@tonic-gate 	ushort_t	sm_flags;	/* smap flags */
85*0Sstevel@tonic-gate 	ushort_t	sm_free_ndx;	/* freelist */
86*0Sstevel@tonic-gate #ifdef	SEGKPM_SUPPORT
87*0Sstevel@tonic-gate 	struct kpme	sm_kpme;	/* segkpm */
88*0Sstevel@tonic-gate #endif
89*0Sstevel@tonic-gate };
90*0Sstevel@tonic-gate 
91*0Sstevel@tonic-gate #ifdef	SEGKPM_SUPPORT
92*0Sstevel@tonic-gate #define	GET_KPME(smp)	(&(smp)->sm_kpme)
93*0Sstevel@tonic-gate #define	sm_kpme_next	sm_kpme.kpe_next
94*0Sstevel@tonic-gate #define	sm_kpme_prev	sm_kpme.kpe_prev
95*0Sstevel@tonic-gate #define	sm_kpme_page	sm_kpme.kpe_page
96*0Sstevel@tonic-gate #else
97*0Sstevel@tonic-gate #define	GET_KPME(smp)	((struct kpme *)NULL)
98*0Sstevel@tonic-gate #endif
99*0Sstevel@tonic-gate 
100*0Sstevel@tonic-gate /* sm_flags */
101*0Sstevel@tonic-gate #define	SM_KPM_NEWPAGE	   0x00000001	/* page created in segmap_getmapft */
102*0Sstevel@tonic-gate #define	SM_NOTKPM_RELEASED 0x00000002	/* released smap not in segkpm mode */
103*0Sstevel@tonic-gate #define	SM_QNDX_ZERO	   0x00000004	/* on the index 0 freelist */
104*0Sstevel@tonic-gate #define	SM_READ_DATA	   0x00000010	/* page created for read */
105*0Sstevel@tonic-gate #define	SM_WRITE_DATA	   0x00000020	/* page created for write */
106*0Sstevel@tonic-gate 
107*0Sstevel@tonic-gate /*
108*0Sstevel@tonic-gate  * Multiple smap free lists are maintained so that allocations
109*0Sstevel@tonic-gate  * will scale with cpu count. Each free list is made up of 2 queues
110*0Sstevel@tonic-gate  * so that allocations and deallocations can proceed concurrently.
111*0Sstevel@tonic-gate  * Each queue structure is padded to 64 bytes to avoid false sharing.
112*0Sstevel@tonic-gate  */
113*0Sstevel@tonic-gate #define	SM_FREEQ_PAD (64 - sizeof (struct smap *) - sizeof (kmutex_t))
114*0Sstevel@tonic-gate struct 	sm_freeq {
115*0Sstevel@tonic-gate 	struct smap	*smq_free;	/* points into freelist */
116*0Sstevel@tonic-gate 	kmutex_t	smq_mtx;	/* protects smq_free */
117*0Sstevel@tonic-gate 	char		smq_pad[SM_FREEQ_PAD];
118*0Sstevel@tonic-gate };
119*0Sstevel@tonic-gate 
120*0Sstevel@tonic-gate struct	smfree {
121*0Sstevel@tonic-gate 	struct sm_freeq	sm_freeq[2];	/* alloc and release queues */
122*0Sstevel@tonic-gate 	struct sm_freeq	*sm_allocq;	/* current allocq */
123*0Sstevel@tonic-gate 	struct sm_freeq	*sm_releq;	/* current releq */
124*0Sstevel@tonic-gate 	kcondvar_t	sm_free_cv;
125*0Sstevel@tonic-gate 	ushort_t	sm_want;	/* someone wants a slot of this color */
126*0Sstevel@tonic-gate };
127*0Sstevel@tonic-gate 
128*0Sstevel@tonic-gate /*
129*0Sstevel@tonic-gate  * Cached smaps are kept on hash chains to enable fast reclaim lookups.
130*0Sstevel@tonic-gate  */
131*0Sstevel@tonic-gate struct  smaphash {
132*0Sstevel@tonic-gate 	kmutex_t	sh_mtx;		/* protects this hash chain */
133*0Sstevel@tonic-gate 	struct  smap	*sh_hash_list;  /* start of hash chain */
134*0Sstevel@tonic-gate };
135*0Sstevel@tonic-gate 
136*0Sstevel@tonic-gate /*
137*0Sstevel@tonic-gate  * (Semi) private data maintained by the segmap driver per SEGMENT mapping
138*0Sstevel@tonic-gate  * All fields in segmap_data are read-only after the segment is created.
139*0Sstevel@tonic-gate  *
140*0Sstevel@tonic-gate  */
141*0Sstevel@tonic-gate 
142*0Sstevel@tonic-gate struct	segmap_data {
143*0Sstevel@tonic-gate 	struct	smap	*smd_sm;	/* array of smap structures */
144*0Sstevel@tonic-gate 	long		smd_npages;	/* size of smap array */
145*0Sstevel@tonic-gate 	struct smfree	*smd_free;	/* ptr to freelist header array */
146*0Sstevel@tonic-gate 	struct smaphash *smd_hash;	/* ptr to hash header array */
147*0Sstevel@tonic-gate 	int		smd_nfree;	/* number of free lists */
148*0Sstevel@tonic-gate 	uchar_t		smd_prot;	/* protections for all smap's */
149*0Sstevel@tonic-gate };
150*0Sstevel@tonic-gate 
151*0Sstevel@tonic-gate /*
152*0Sstevel@tonic-gate  * Statistics for segmap operations.
153*0Sstevel@tonic-gate  *
154*0Sstevel@tonic-gate  * No explicit locking to protect these stats.
155*0Sstevel@tonic-gate  */
156*0Sstevel@tonic-gate struct segmapcnt {
157*0Sstevel@tonic-gate 	kstat_named_t	smp_fault;	/* number of segmap_faults */
158*0Sstevel@tonic-gate 	kstat_named_t	smp_faulta;	/* number of segmap_faultas */
159*0Sstevel@tonic-gate 	kstat_named_t	smp_getmap;	/* number of segmap_getmaps */
160*0Sstevel@tonic-gate 	kstat_named_t	smp_get_use;	/* getmaps that reuse existing map */
161*0Sstevel@tonic-gate 	kstat_named_t	smp_get_reclaim; /* getmaps that do a reclaim */
162*0Sstevel@tonic-gate 	kstat_named_t	smp_get_reuse;	/* getmaps that reuse a slot */
163*0Sstevel@tonic-gate 	kstat_named_t	smp_get_unused;	/* getmaps that reuse existing map */
164*0Sstevel@tonic-gate 	kstat_named_t	smp_get_nofree;	/* getmaps with no free slots */
165*0Sstevel@tonic-gate 	kstat_named_t	smp_rel_async;	/* releases that are async */
166*0Sstevel@tonic-gate 	kstat_named_t	smp_rel_write;	/* releases that write */
167*0Sstevel@tonic-gate 	kstat_named_t	smp_rel_free;	/* releases that free */
168*0Sstevel@tonic-gate 	kstat_named_t	smp_rel_abort;	/* releases that abort */
169*0Sstevel@tonic-gate 	kstat_named_t	smp_rel_dontneed; /* releases with dontneed set */
170*0Sstevel@tonic-gate 	kstat_named_t	smp_release;	/* releases with no other action */
171*0Sstevel@tonic-gate 	kstat_named_t	smp_pagecreate;	/* pagecreates */
172*0Sstevel@tonic-gate 	kstat_named_t   smp_free_notfree; /* pages not freed in */
173*0Sstevel@tonic-gate 					/* segmap_pagefree */
174*0Sstevel@tonic-gate 	kstat_named_t   smp_free_dirty; /* dirty pages freeed */
175*0Sstevel@tonic-gate 					/* in segmap_pagefree */
176*0Sstevel@tonic-gate 	kstat_named_t   smp_free;	/* clean pages freeed in */
177*0Sstevel@tonic-gate 					/* segmap_pagefree */
178*0Sstevel@tonic-gate 	kstat_named_t	smp_stolen;	/* segmap_getmapflt() stole */
179*0Sstevel@tonic-gate 					/* from get_free_smp() */
180*0Sstevel@tonic-gate 	kstat_named_t	smp_get_nomtx;	/* free smaps but no mutex */
181*0Sstevel@tonic-gate };
182*0Sstevel@tonic-gate 
183*0Sstevel@tonic-gate /*
184*0Sstevel@tonic-gate  * These are flags used on release.  Some of these might get handled
185*0Sstevel@tonic-gate  * by segment operations needed for msync (when we figure them out).
186*0Sstevel@tonic-gate  * SM_ASYNC modifies SM_WRITE.  SM_DONTNEED modifies SM_FREE.  SM_FREE
187*0Sstevel@tonic-gate  * and SM_INVAL as well as SM_FREE and SM_DESTROY are mutually exclusive.
188*0Sstevel@tonic-gate  * SM_DESTROY behaves like SM_INVAL but also forces the pages to be
189*0Sstevel@tonic-gate  * destroyed -- this prevents them from being written to the backing
190*0Sstevel@tonic-gate  * store.
191*0Sstevel@tonic-gate  */
192*0Sstevel@tonic-gate #define	SM_WRITE	0x01		/* write back the pages upon release */
193*0Sstevel@tonic-gate #define	SM_ASYNC	0x02		/* do the write asynchronously */
194*0Sstevel@tonic-gate #define	SM_FREE		0x04		/* put pages back on free list */
195*0Sstevel@tonic-gate #define	SM_INVAL	0x08		/* invalidate page (no caching) */
196*0Sstevel@tonic-gate #define	SM_DONTNEED	0x10		/* less likely to be needed soon */
197*0Sstevel@tonic-gate #define	SM_DESTROY	0x20		/* invalidate page, don't write back */
198*0Sstevel@tonic-gate 
199*0Sstevel@tonic-gate /*
200*0Sstevel@tonic-gate  * These are the forcefault flags used on getmapflt.
201*0Sstevel@tonic-gate  *
202*0Sstevel@tonic-gate  * The orginal semantic was extended to allow using the segkpm mapping
203*0Sstevel@tonic-gate  * scheme w/o a major segmap interface change for MAXBSIZE == PAGESIZE
204*0Sstevel@tonic-gate  * (which is required to enable segkpm for MAXBSIZE > PAGESIZE).
205*0Sstevel@tonic-gate  * Most segmap consumers needn't to be changed at all or only need to
206*0Sstevel@tonic-gate  * be changed slightly to take advantage of segkpm. Because the segkpm
207*0Sstevel@tonic-gate  * virtual address is based on the physical address of a page, a page is
208*0Sstevel@tonic-gate  * required to determine the virtual address (return value). Pages mapped
209*0Sstevel@tonic-gate  * with segkpm are always at least read locked and are hence protected
210*0Sstevel@tonic-gate  * from pageout or fsflush from segmap_getmap until segmap_release. This
211*0Sstevel@tonic-gate  * implies, that the segkpm mappings are locked within this period too.
212*0Sstevel@tonic-gate  * No trap driven segmap_fault's are possible in segkpm mode.
213*0Sstevel@tonic-gate  *
214*0Sstevel@tonic-gate  * The following combinations of "forcefault" and "rw" allow segkpm mode.
215*0Sstevel@tonic-gate  * (1) SM_FAULT, S_READ
216*0Sstevel@tonic-gate  * (2) SM_FAULT, S_WRITE
217*0Sstevel@tonic-gate  * (3) SM_PAGECREATE, S_WRITE
218*0Sstevel@tonic-gate  * (4) SM_LOCKPROTO, {S_READ, S_WRITE, S_OTHER}
219*0Sstevel@tonic-gate  *
220*0Sstevel@tonic-gate  * The regular additional operations (come in pairs in most of the cases):
221*0Sstevel@tonic-gate  * . segmap_pagecreate/segmap_pageunlock
222*0Sstevel@tonic-gate  * . segmap_fault(F_SOFTLOCK)/segmap_fault(F_SOFTUNLOCK)
223*0Sstevel@tonic-gate  *
224*0Sstevel@tonic-gate  * are mostly a no-op in segkpm mode with the following exceptions:
225*0Sstevel@tonic-gate  * . The "newpage" return value of segmap_pagecreate is still supported
226*0Sstevel@tonic-gate  *   for zeroout operations needed on newly created pages.
227*0Sstevel@tonic-gate  *
228*0Sstevel@tonic-gate  * . segmap_fault() must follow when a error could be expected in
229*0Sstevel@tonic-gate  *   the VOP_GETPAGE. In segkpm mode this error is recognized in
230*0Sstevel@tonic-gate  *   segmap_getmapflt and returned from the following segmap_fault()
231*0Sstevel@tonic-gate  *   call. The "hole" optimization (read only after first VOP_GETPAGE
232*0Sstevel@tonic-gate  *   mapping in segmap_getmapflt followed by a trap driven protection
233*0Sstevel@tonic-gate  *   fault and a second VOP_GETPAGE via segmap_fault) cannot be used.
234*0Sstevel@tonic-gate  *
235*0Sstevel@tonic-gate  * . segmap_fault(F_SOFTUNLOCK) must follow when segmap_getmapflt was
236*0Sstevel@tonic-gate  *   called w/ (SM_LOCKPROTO, S_OTHER). S_WRITE has to be applied, when
237*0Sstevel@tonic-gate  *   the page should be marked "dirty". Otherwise the page is not
238*0Sstevel@tonic-gate  *   written to the backing store later (as mentioned above, no page
239*0Sstevel@tonic-gate  *   or protection faults are possible in segkpm mode). Caller cannot
240*0Sstevel@tonic-gate  *   use only S_OTHER and rely on a protection fault to force the page
241*0Sstevel@tonic-gate  *   to become dirty.
242*0Sstevel@tonic-gate  *
243*0Sstevel@tonic-gate  * . The segmap_pagecreate parameter softlock is ignored, pages and
244*0Sstevel@tonic-gate  *   mappings are locked anyway.
245*0Sstevel@tonic-gate  *
246*0Sstevel@tonic-gate  * SM_LOCKPROTO is used in the fbio layer and some special segmap consumers.
247*0Sstevel@tonic-gate  */
248*0Sstevel@tonic-gate #define	SM_PAGECREATE	0x00		/* create page in segkpm mode, no I/O */
249*0Sstevel@tonic-gate #define	SM_FAULT	0x01		/* fault in page if necessary */
250*0Sstevel@tonic-gate #define	SM_LOCKPROTO	0x02		/* lock/unlock protocol used */
251*0Sstevel@tonic-gate 
252*0Sstevel@tonic-gate #define	MAXBSHIFT	13		/* log2(MAXBSIZE) */
253*0Sstevel@tonic-gate 
254*0Sstevel@tonic-gate #define	MAXBOFFSET	(MAXBSIZE - 1)
255*0Sstevel@tonic-gate #define	MAXBMASK	(~MAXBOFFSET)
256*0Sstevel@tonic-gate 
257*0Sstevel@tonic-gate /*
258*0Sstevel@tonic-gate  * SMAP_HASHAVELEN is the average length desired for this chain, from
259*0Sstevel@tonic-gate  * which the size of the smd_hash table is derived at segment create time.
260*0Sstevel@tonic-gate  * SMAP_HASHVPSHIFT is defined so that 1 << SMAP_HASHVPSHIFT is the
261*0Sstevel@tonic-gate  * approximate size of a vnode struct.
262*0Sstevel@tonic-gate  */
263*0Sstevel@tonic-gate #define	SMAP_HASHAVELEN		4
264*0Sstevel@tonic-gate #define	SMAP_HASHVPSHIFT	6
265*0Sstevel@tonic-gate 
266*0Sstevel@tonic-gate 
267*0Sstevel@tonic-gate #ifdef _KERNEL
268*0Sstevel@tonic-gate /*
269*0Sstevel@tonic-gate  * The kernel generic mapping segment.
270*0Sstevel@tonic-gate  */
271*0Sstevel@tonic-gate extern struct seg *segkmap;
272*0Sstevel@tonic-gate 
273*0Sstevel@tonic-gate /*
274*0Sstevel@tonic-gate  * Public seg_map segment operations.
275*0Sstevel@tonic-gate  */
276*0Sstevel@tonic-gate extern int	segmap_create(struct seg *, void *);
277*0Sstevel@tonic-gate extern int	segmap_pagecreate(struct seg *, caddr_t, size_t, int);
278*0Sstevel@tonic-gate extern void	segmap_pageunlock(struct seg *, caddr_t, size_t, enum seg_rw);
279*0Sstevel@tonic-gate extern faultcode_t segmap_fault(struct hat *, struct seg *, caddr_t, size_t,
280*0Sstevel@tonic-gate 		enum fault_type, enum seg_rw);
281*0Sstevel@tonic-gate extern caddr_t	segmap_getmap(struct seg *, struct vnode *, u_offset_t);
282*0Sstevel@tonic-gate extern caddr_t	segmap_getmapflt(struct seg *, struct vnode *, u_offset_t,
283*0Sstevel@tonic-gate 		size_t, int, enum seg_rw);
284*0Sstevel@tonic-gate extern int	segmap_release(struct seg *, caddr_t, uint_t);
285*0Sstevel@tonic-gate extern void	segmap_flush(struct seg *, struct vnode *);
286*0Sstevel@tonic-gate extern void	segmap_inval(struct seg *, struct vnode *, u_offset_t);
287*0Sstevel@tonic-gate 
288*0Sstevel@tonic-gate #endif	/* _KERNEL */
289*0Sstevel@tonic-gate 
290*0Sstevel@tonic-gate #ifdef	__cplusplus
291*0Sstevel@tonic-gate }
292*0Sstevel@tonic-gate #endif
293*0Sstevel@tonic-gate 
294*0Sstevel@tonic-gate #endif	/* _VM_SEG_MAP_H */
295