xref: /onnv-gate/usr/src/cmd/sendmail/db/include/mp.h (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*-
2*0Sstevel@tonic-gate  * See the file LICENSE for redistribution information.
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * Copyright (c) 1996, 1997, 1998
5*0Sstevel@tonic-gate  *	Sleepycat Software.  All rights reserved.
6*0Sstevel@tonic-gate  *
7*0Sstevel@tonic-gate  *	@(#)mp.h	10.37 (Sleepycat) 1/1/99
8*0Sstevel@tonic-gate  */
9*0Sstevel@tonic-gate 
10*0Sstevel@tonic-gate struct __bh;		typedef struct __bh BH;
11*0Sstevel@tonic-gate struct __db_mpreg;	typedef struct __db_mpreg DB_MPREG;
12*0Sstevel@tonic-gate struct __mpool;		typedef struct __mpool MPOOL;
13*0Sstevel@tonic-gate struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
14*0Sstevel@tonic-gate 
15*0Sstevel@tonic-gate 					/* Default mpool name. */
16*0Sstevel@tonic-gate #define	DB_DEFAULT_MPOOL_FILE	"__db_mpool.share"
17*0Sstevel@tonic-gate 
18*0Sstevel@tonic-gate /*
19*0Sstevel@tonic-gate  * We default to 256K (32 8K pages) if the user doesn't specify, and
20*0Sstevel@tonic-gate  * require a minimum of 20K.
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate #ifndef	DB_CACHESIZE_DEF
23*0Sstevel@tonic-gate #define	DB_CACHESIZE_DEF	(256 * 1024)
24*0Sstevel@tonic-gate #endif
25*0Sstevel@tonic-gate #define	DB_CACHESIZE_MIN	( 20 * 1024)
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #define	INVALID		0		/* Invalid shared memory offset. */
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate /*
30*0Sstevel@tonic-gate  * There are three ways we do locking in the mpool code:
31*0Sstevel@tonic-gate  *
32*0Sstevel@tonic-gate  * Locking a handle mutex to provide concurrency for DB_THREAD operations.
33*0Sstevel@tonic-gate  * Locking the region mutex to provide mutual exclusion while reading and
34*0Sstevel@tonic-gate  *    writing structures in the shared region.
35*0Sstevel@tonic-gate  * Locking buffer header mutexes during I/O.
36*0Sstevel@tonic-gate  *
37*0Sstevel@tonic-gate  * The first will not be further described here.  We use the shared mpool
38*0Sstevel@tonic-gate  * region lock to provide mutual exclusion while reading/modifying all of
39*0Sstevel@tonic-gate  * the data structures, including the buffer headers.  We use a per-buffer
40*0Sstevel@tonic-gate  * header lock to wait on buffer I/O.  The order of locking is as follows:
41*0Sstevel@tonic-gate  *
42*0Sstevel@tonic-gate  * Searching for a buffer:
43*0Sstevel@tonic-gate  *	Acquire the region lock.
44*0Sstevel@tonic-gate  *	Find the buffer header.
45*0Sstevel@tonic-gate  *	Increment the reference count (guarantee the buffer stays).
46*0Sstevel@tonic-gate  *	While the BH_LOCKED flag is set (I/O is going on) {
47*0Sstevel@tonic-gate  *	    Release the region lock.
48*0Sstevel@tonic-gate  *		Explicitly yield the processor if it's not the first pass
49*0Sstevel@tonic-gate  *		through this loop, otherwise, we can simply spin because
50*0Sstevel@tonic-gate  *		we'll be simply switching between the two locks.
51*0Sstevel@tonic-gate  *	    Request the buffer lock.
52*0Sstevel@tonic-gate  *	    The I/O will complete...
53*0Sstevel@tonic-gate  *	    Acquire the buffer lock.
54*0Sstevel@tonic-gate  *	    Release the buffer lock.
55*0Sstevel@tonic-gate  *	    Acquire the region lock.
56*0Sstevel@tonic-gate  *	}
57*0Sstevel@tonic-gate  *	Return the buffer.
58*0Sstevel@tonic-gate  *
59*0Sstevel@tonic-gate  * Reading/writing a buffer:
60*0Sstevel@tonic-gate  *	Acquire the region lock.
61*0Sstevel@tonic-gate  *	Find/create the buffer header.
62*0Sstevel@tonic-gate  *	If reading, increment the reference count (guarantee the buffer stays).
63*0Sstevel@tonic-gate  *	Set the BH_LOCKED flag.
64*0Sstevel@tonic-gate  *	Acquire the buffer lock (guaranteed not to block).
65*0Sstevel@tonic-gate  *	Release the region lock.
66*0Sstevel@tonic-gate  *	Do the I/O and/or initialize the buffer contents.
67*0Sstevel@tonic-gate  *	Release the buffer lock.
68*0Sstevel@tonic-gate  *	    At this point, the buffer lock is available, but the logical
69*0Sstevel@tonic-gate  *	    operation (flagged by BH_LOCKED) is not yet completed.  For
70*0Sstevel@tonic-gate  *	    this reason, among others, threads checking the BH_LOCKED flag
71*0Sstevel@tonic-gate  *	    must loop around their test.
72*0Sstevel@tonic-gate  *	Acquire the region lock.
73*0Sstevel@tonic-gate  *	Clear the BH_LOCKED flag.
74*0Sstevel@tonic-gate  *	Release the region lock.
75*0Sstevel@tonic-gate  *	Return/discard the buffer.
76*0Sstevel@tonic-gate  *
77*0Sstevel@tonic-gate  * Pointers to DB_MPOOL, MPOOL, DB_MPOOLFILE and MPOOLFILE structures are not
78*0Sstevel@tonic-gate  * reacquired when a region lock is reacquired because they couldn't have been
79*0Sstevel@tonic-gate  * closed/discarded and because they never move in memory.
80*0Sstevel@tonic-gate  */
81*0Sstevel@tonic-gate #define	LOCKINIT(dbmp, mutexp)						\
82*0Sstevel@tonic-gate 	if (F_ISSET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION))		\
83*0Sstevel@tonic-gate 		(void)__db_mutex_init(mutexp,				\
84*0Sstevel@tonic-gate 		    MUTEX_LOCK_OFFSET((dbmp)->reginfo.addr, mutexp))
85*0Sstevel@tonic-gate 
86*0Sstevel@tonic-gate #define	LOCKHANDLE(dbmp, mutexp)					\
87*0Sstevel@tonic-gate 	if (F_ISSET(dbmp, MP_LOCKHANDLE))				\
88*0Sstevel@tonic-gate 		(void)__db_mutex_lock(mutexp, (dbmp)->reginfo.fd)
89*0Sstevel@tonic-gate #define	UNLOCKHANDLE(dbmp, mutexp)					\
90*0Sstevel@tonic-gate 	if (F_ISSET(dbmp, MP_LOCKHANDLE))				\
91*0Sstevel@tonic-gate 		(void)__db_mutex_unlock(mutexp, (dbmp)->reginfo.fd)
92*0Sstevel@tonic-gate 
93*0Sstevel@tonic-gate #define	LOCKREGION(dbmp)						\
94*0Sstevel@tonic-gate 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
95*0Sstevel@tonic-gate 		(void)__db_mutex_lock(&((RLAYOUT *)(dbmp)->mp)->lock,	\
96*0Sstevel@tonic-gate 		    (dbmp)->reginfo.fd)
97*0Sstevel@tonic-gate #define	UNLOCKREGION(dbmp)						\
98*0Sstevel@tonic-gate 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
99*0Sstevel@tonic-gate 		(void)__db_mutex_unlock(&((RLAYOUT *)(dbmp)->mp)->lock,	\
100*0Sstevel@tonic-gate 		(dbmp)->reginfo.fd)
101*0Sstevel@tonic-gate 
102*0Sstevel@tonic-gate #define	LOCKBUFFER(dbmp, bhp)						\
103*0Sstevel@tonic-gate 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
104*0Sstevel@tonic-gate 		(void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->reginfo.fd)
105*0Sstevel@tonic-gate #define	UNLOCKBUFFER(dbmp, bhp)						\
106*0Sstevel@tonic-gate 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
107*0Sstevel@tonic-gate 		(void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->reginfo.fd)
108*0Sstevel@tonic-gate 
109*0Sstevel@tonic-gate /* Check for region catastrophic shutdown. */
110*0Sstevel@tonic-gate #define	MP_PANIC_CHECK(dbmp) {						\
111*0Sstevel@tonic-gate 	if ((dbmp)->mp->rlayout.panic)					\
112*0Sstevel@tonic-gate 		return (DB_RUNRECOVERY);				\
113*0Sstevel@tonic-gate }
114*0Sstevel@tonic-gate 
115*0Sstevel@tonic-gate /*
116*0Sstevel@tonic-gate  * DB_MPOOL --
117*0Sstevel@tonic-gate  *	Per-process memory pool structure.
118*0Sstevel@tonic-gate  */
119*0Sstevel@tonic-gate struct __db_mpool {
120*0Sstevel@tonic-gate /* These fields need to be protected for multi-threaded support. */
121*0Sstevel@tonic-gate 	db_mutex_t	*mutexp;	/* Structure lock. */
122*0Sstevel@tonic-gate 
123*0Sstevel@tonic-gate 					/* List of pgin/pgout routines. */
124*0Sstevel@tonic-gate 	LIST_HEAD(__db_mpregh, __db_mpreg) dbregq;
125*0Sstevel@tonic-gate 
126*0Sstevel@tonic-gate 					/* List of DB_MPOOLFILE's. */
127*0Sstevel@tonic-gate 	TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq;
128*0Sstevel@tonic-gate 
129*0Sstevel@tonic-gate /* These fields are not protected. */
130*0Sstevel@tonic-gate 	DB_ENV     *dbenv;		/* Reference to error information. */
131*0Sstevel@tonic-gate 	REGINFO	    reginfo;		/* Region information. */
132*0Sstevel@tonic-gate 
133*0Sstevel@tonic-gate 	MPOOL	   *mp;			/* Address of the shared MPOOL. */
134*0Sstevel@tonic-gate 
135*0Sstevel@tonic-gate 	void	   *addr;		/* Address of shalloc() region. */
136*0Sstevel@tonic-gate 
137*0Sstevel@tonic-gate 	DB_HASHTAB *htab;		/* Hash table of bucket headers. */
138*0Sstevel@tonic-gate 
139*0Sstevel@tonic-gate #define	MP_LOCKHANDLE	0x01		/* Threaded, lock handles and region. */
140*0Sstevel@tonic-gate #define	MP_LOCKREGION	0x02		/* Concurrent access, lock region. */
141*0Sstevel@tonic-gate 	u_int32_t  flags;
142*0Sstevel@tonic-gate };
143*0Sstevel@tonic-gate 
144*0Sstevel@tonic-gate /*
145*0Sstevel@tonic-gate  * DB_MPREG --
146*0Sstevel@tonic-gate  *	DB_MPOOL registry of pgin/pgout functions.
147*0Sstevel@tonic-gate  */
148*0Sstevel@tonic-gate struct __db_mpreg {
149*0Sstevel@tonic-gate 	LIST_ENTRY(__db_mpreg) q;	/* Linked list. */
150*0Sstevel@tonic-gate 
151*0Sstevel@tonic-gate 	int ftype;			/* File type. */
152*0Sstevel@tonic-gate 					/* Pgin, pgout routines. */
153*0Sstevel@tonic-gate 	int (DB_CALLBACK *pgin) __P((db_pgno_t, void *, DBT *));
154*0Sstevel@tonic-gate 	int (DB_CALLBACK *pgout) __P((db_pgno_t, void *, DBT *));
155*0Sstevel@tonic-gate };
156*0Sstevel@tonic-gate 
157*0Sstevel@tonic-gate /*
158*0Sstevel@tonic-gate  * DB_MPOOLFILE --
159*0Sstevel@tonic-gate  *	Per-process DB_MPOOLFILE information.
160*0Sstevel@tonic-gate  */
161*0Sstevel@tonic-gate struct __db_mpoolfile {
162*0Sstevel@tonic-gate /* These fields need to be protected for multi-threaded support. */
163*0Sstevel@tonic-gate 	db_mutex_t	*mutexp;	/* Structure lock. */
164*0Sstevel@tonic-gate 
165*0Sstevel@tonic-gate 	int	   fd;			/* Underlying file descriptor. */
166*0Sstevel@tonic-gate 
167*0Sstevel@tonic-gate 	u_int32_t ref;			/* Reference count. */
168*0Sstevel@tonic-gate 
169*0Sstevel@tonic-gate 	/*
170*0Sstevel@tonic-gate 	 * !!!
171*0Sstevel@tonic-gate 	 * This field is a special case -- it's protected by the region lock
172*0Sstevel@tonic-gate 	 * NOT the thread lock.  The reason for this is that we always have
173*0Sstevel@tonic-gate 	 * the region lock immediately before or after we modify the field,
174*0Sstevel@tonic-gate 	 * and we don't want to use the structure lock to protect it because
175*0Sstevel@tonic-gate 	 * then I/O (which is done with the structure lock held because of
176*0Sstevel@tonic-gate 	 * the race between the seek and write of the file descriptor) will
177*0Sstevel@tonic-gate 	 * block any other put/get calls using this DB_MPOOLFILE structure.
178*0Sstevel@tonic-gate 	 */
179*0Sstevel@tonic-gate 	u_int32_t pinref;		/* Pinned block reference count. */
180*0Sstevel@tonic-gate 
181*0Sstevel@tonic-gate /* These fields are not protected. */
182*0Sstevel@tonic-gate 	TAILQ_ENTRY(__db_mpoolfile) q;	/* Linked list of DB_MPOOLFILE's. */
183*0Sstevel@tonic-gate 
184*0Sstevel@tonic-gate 	DB_MPOOL  *dbmp;		/* Overlying DB_MPOOL. */
185*0Sstevel@tonic-gate 	MPOOLFILE *mfp;			/* Underlying MPOOLFILE. */
186*0Sstevel@tonic-gate 
187*0Sstevel@tonic-gate 	void	  *addr;		/* Address of mmap'd region. */
188*0Sstevel@tonic-gate 	size_t	   len;			/* Length of mmap'd region. */
189*0Sstevel@tonic-gate 
190*0Sstevel@tonic-gate /* These fields need to be protected for multi-threaded support. */
191*0Sstevel@tonic-gate #define	MP_READONLY	0x01		/* File is readonly. */
192*0Sstevel@tonic-gate #define	MP_UPGRADE	0x02		/* File descriptor is readwrite. */
193*0Sstevel@tonic-gate #define	MP_UPGRADE_FAIL	0x04		/* Upgrade wasn't possible. */
194*0Sstevel@tonic-gate 	u_int32_t  flags;
195*0Sstevel@tonic-gate };
196*0Sstevel@tonic-gate 
197*0Sstevel@tonic-gate /*
198*0Sstevel@tonic-gate  * MPOOL --
199*0Sstevel@tonic-gate  *	Shared memory pool region.  One of these is allocated in shared
200*0Sstevel@tonic-gate  *	memory, and describes the pool.
201*0Sstevel@tonic-gate  */
202*0Sstevel@tonic-gate struct __mpool {
203*0Sstevel@tonic-gate 	RLAYOUT	    rlayout;		/* General region information. */
204*0Sstevel@tonic-gate 
205*0Sstevel@tonic-gate 	SH_TAILQ_HEAD(__bhq) bhq;	/* LRU list of buckets. */
206*0Sstevel@tonic-gate 	SH_TAILQ_HEAD(__bhfq) bhfq;	/* Free buckets. */
207*0Sstevel@tonic-gate 	SH_TAILQ_HEAD(__mpfq) mpfq;	/* List of MPOOLFILEs. */
208*0Sstevel@tonic-gate 
209*0Sstevel@tonic-gate 	/*
210*0Sstevel@tonic-gate 	 * We make the assumption that the early pages of the file are far
211*0Sstevel@tonic-gate 	 * more likely to be retrieved than the later pages, which means
212*0Sstevel@tonic-gate 	 * that the top bits are more interesting for hashing since they're
213*0Sstevel@tonic-gate 	 * less likely to collide.  On the other hand, since 512 4K pages
214*0Sstevel@tonic-gate 	 * represents a 2MB file, only the bottom 9 bits of the page number
215*0Sstevel@tonic-gate 	 * are likely to be set.  We XOR in the offset in the MPOOL of the
216*0Sstevel@tonic-gate 	 * MPOOLFILE that backs this particular page, since that should also
217*0Sstevel@tonic-gate 	 * be unique for the page.
218*0Sstevel@tonic-gate 	 */
219*0Sstevel@tonic-gate #define	BUCKET(mp, mf_offset, pgno)					\
220*0Sstevel@tonic-gate 	(((pgno) ^ ((mf_offset) << 9)) % (mp)->htab_buckets)
221*0Sstevel@tonic-gate 
222*0Sstevel@tonic-gate 	size_t	    htab;		/* Hash table offset. */
223*0Sstevel@tonic-gate 	size_t	    htab_buckets;	/* Number of hash table entries. */
224*0Sstevel@tonic-gate 
225*0Sstevel@tonic-gate 	DB_LSN	    lsn;		/* Maximum checkpoint LSN. */
226*0Sstevel@tonic-gate 	u_int32_t   lsn_cnt;		/* Checkpoint buffers left to write. */
227*0Sstevel@tonic-gate 
228*0Sstevel@tonic-gate 	DB_MPOOL_STAT stat;		/* Global mpool statistics. */
229*0Sstevel@tonic-gate 
230*0Sstevel@tonic-gate #define	MP_LSN_RETRY	0x01		/* Retry all BH_WRITE buffers. */
231*0Sstevel@tonic-gate 	u_int32_t  flags;
232*0Sstevel@tonic-gate };
233*0Sstevel@tonic-gate 
234*0Sstevel@tonic-gate /*
235*0Sstevel@tonic-gate  * MPOOLFILE --
236*0Sstevel@tonic-gate  *	Shared DB_MPOOLFILE information.
237*0Sstevel@tonic-gate  */
238*0Sstevel@tonic-gate struct __mpoolfile {
239*0Sstevel@tonic-gate 	SH_TAILQ_ENTRY  q;		/* List of MPOOLFILEs */
240*0Sstevel@tonic-gate 
241*0Sstevel@tonic-gate 	u_int32_t ref;			/* Reference count. */
242*0Sstevel@tonic-gate 
243*0Sstevel@tonic-gate 	int	  ftype;		/* File type. */
244*0Sstevel@tonic-gate 
245*0Sstevel@tonic-gate 	int32_t	  lsn_off;		/* Page's LSN offset. */
246*0Sstevel@tonic-gate 	u_int32_t clear_len;		/* Bytes to clear on page create. */
247*0Sstevel@tonic-gate 
248*0Sstevel@tonic-gate 	size_t	  path_off;		/* File name location. */
249*0Sstevel@tonic-gate 	size_t	  fileid_off;		/* File identification location. */
250*0Sstevel@tonic-gate 
251*0Sstevel@tonic-gate 	size_t	  pgcookie_len;		/* Pgin/pgout cookie length. */
252*0Sstevel@tonic-gate 	size_t	  pgcookie_off;		/* Pgin/pgout cookie location. */
253*0Sstevel@tonic-gate 
254*0Sstevel@tonic-gate 	u_int32_t lsn_cnt;		/* Checkpoint buffers left to write. */
255*0Sstevel@tonic-gate 
256*0Sstevel@tonic-gate 	db_pgno_t last_pgno;		/* Last page in the file. */
257*0Sstevel@tonic-gate 	db_pgno_t orig_last_pgno;	/* Original last page in the file. */
258*0Sstevel@tonic-gate 
259*0Sstevel@tonic-gate #define	MP_CAN_MMAP	0x01		/* If the file can be mmap'd. */
260*0Sstevel@tonic-gate #define	MP_TEMP		0x02		/* Backing file is a temporary. */
261*0Sstevel@tonic-gate 	u_int32_t  flags;
262*0Sstevel@tonic-gate 
263*0Sstevel@tonic-gate 	DB_MPOOL_FSTAT stat;		/* Per-file mpool statistics. */
264*0Sstevel@tonic-gate };
265*0Sstevel@tonic-gate 
266*0Sstevel@tonic-gate /*
267*0Sstevel@tonic-gate  * BH --
268*0Sstevel@tonic-gate  *	Buffer header.
269*0Sstevel@tonic-gate  */
270*0Sstevel@tonic-gate struct __bh {
271*0Sstevel@tonic-gate 	db_mutex_t	mutex;		/* Structure lock. */
272*0Sstevel@tonic-gate 
273*0Sstevel@tonic-gate 	u_int16_t	ref;		/* Reference count. */
274*0Sstevel@tonic-gate 
275*0Sstevel@tonic-gate #define	BH_CALLPGIN	0x001		/* Page needs to be reworked... */
276*0Sstevel@tonic-gate #define	BH_DIRTY	0x002		/* Page was modified. */
277*0Sstevel@tonic-gate #define	BH_DISCARD	0x004		/* Page is useless. */
278*0Sstevel@tonic-gate #define	BH_LOCKED	0x008		/* Page is locked (I/O in progress). */
279*0Sstevel@tonic-gate #define	BH_TRASH	0x010		/* Page is garbage. */
280*0Sstevel@tonic-gate #define	BH_WRITE	0x020		/* Page scheduled for writing. */
281*0Sstevel@tonic-gate 	u_int16_t  flags;
282*0Sstevel@tonic-gate 
283*0Sstevel@tonic-gate 	SH_TAILQ_ENTRY	q;		/* LRU queue. */
284*0Sstevel@tonic-gate 	SH_TAILQ_ENTRY	hq;		/* MPOOL hash bucket queue. */
285*0Sstevel@tonic-gate 
286*0Sstevel@tonic-gate 	db_pgno_t pgno;			/* Underlying MPOOLFILE page number. */
287*0Sstevel@tonic-gate 	size_t	  mf_offset;		/* Associated MPOOLFILE offset. */
288*0Sstevel@tonic-gate 
289*0Sstevel@tonic-gate 	/*
290*0Sstevel@tonic-gate 	 * !!!
291*0Sstevel@tonic-gate 	 * This array must be size_t aligned -- the DB access methods put PAGE
292*0Sstevel@tonic-gate 	 * and other structures into it, and expect to be able to access them
293*0Sstevel@tonic-gate 	 * directly.  (We guarantee size_t alignment in the db_mpool(3) manual
294*0Sstevel@tonic-gate 	 * page as well.)
295*0Sstevel@tonic-gate 	 */
296*0Sstevel@tonic-gate 	u_int8_t   buf[1];		/* Variable length data. */
297*0Sstevel@tonic-gate };
298*0Sstevel@tonic-gate 
299*0Sstevel@tonic-gate #include "mp_ext.h"
300