xref: /onnv-gate/usr/src/cmd/sendmail/db/mp/mp_region.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*-
2*0Sstevel@tonic-gate  * See the file LICENSE for redistribution information.
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * Copyright (c) 1996, 1997, 1998
5*0Sstevel@tonic-gate  *	Sleepycat Software.  All rights reserved.
6*0Sstevel@tonic-gate  */
7*0Sstevel@tonic-gate #include "config.h"
8*0Sstevel@tonic-gate 
9*0Sstevel@tonic-gate #ifndef lint
10*0Sstevel@tonic-gate static const char sccsid[] = "@(#)mp_region.c	10.35 (Sleepycat) 12/11/98";
11*0Sstevel@tonic-gate #endif /* not lint */
12*0Sstevel@tonic-gate 
13*0Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
14*0Sstevel@tonic-gate #include <sys/types.h>
15*0Sstevel@tonic-gate 
16*0Sstevel@tonic-gate #include <errno.h>
17*0Sstevel@tonic-gate #include <string.h>
18*0Sstevel@tonic-gate #endif
19*0Sstevel@tonic-gate 
20*0Sstevel@tonic-gate #include "db_int.h"
21*0Sstevel@tonic-gate #include "shqueue.h"
22*0Sstevel@tonic-gate #include "db_shash.h"
23*0Sstevel@tonic-gate #include "mp.h"
24*0Sstevel@tonic-gate #include "common_ext.h"
25*0Sstevel@tonic-gate 
26*0Sstevel@tonic-gate /*
27*0Sstevel@tonic-gate  * __memp_reg_alloc --
28*0Sstevel@tonic-gate  *	Allocate some space in the mpool region, with locking.
29*0Sstevel@tonic-gate  *
30*0Sstevel@tonic-gate  * PUBLIC: int __memp_reg_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
31*0Sstevel@tonic-gate  */
32*0Sstevel@tonic-gate int
__memp_reg_alloc(dbmp,len,offsetp,retp)33*0Sstevel@tonic-gate __memp_reg_alloc(dbmp, len, offsetp, retp)
34*0Sstevel@tonic-gate 	DB_MPOOL *dbmp;
35*0Sstevel@tonic-gate 	size_t len, *offsetp;
36*0Sstevel@tonic-gate 	void *retp;
37*0Sstevel@tonic-gate {
38*0Sstevel@tonic-gate 	int ret;
39*0Sstevel@tonic-gate 
40*0Sstevel@tonic-gate 	LOCKREGION(dbmp);
41*0Sstevel@tonic-gate 	ret = __memp_alloc(dbmp, len, offsetp, retp);
42*0Sstevel@tonic-gate 	UNLOCKREGION(dbmp);
43*0Sstevel@tonic-gate 	return (ret);
44*0Sstevel@tonic-gate }
45*0Sstevel@tonic-gate 
46*0Sstevel@tonic-gate /*
47*0Sstevel@tonic-gate  * __memp_alloc --
48*0Sstevel@tonic-gate  *	Allocate some space in the mpool region.
49*0Sstevel@tonic-gate  *
50*0Sstevel@tonic-gate  * PUBLIC: int __memp_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
51*0Sstevel@tonic-gate  */
52*0Sstevel@tonic-gate int
__memp_alloc(dbmp,len,offsetp,retp)53*0Sstevel@tonic-gate __memp_alloc(dbmp, len, offsetp, retp)
54*0Sstevel@tonic-gate 	DB_MPOOL *dbmp;
55*0Sstevel@tonic-gate 	size_t len, *offsetp;
56*0Sstevel@tonic-gate 	void *retp;
57*0Sstevel@tonic-gate {
58*0Sstevel@tonic-gate 	BH *bhp, *nbhp;
59*0Sstevel@tonic-gate 	MPOOL *mp;
60*0Sstevel@tonic-gate 	MPOOLFILE *mfp;
61*0Sstevel@tonic-gate 	size_t fsize, total;
62*0Sstevel@tonic-gate 	int nomore, restart, ret, wrote;
63*0Sstevel@tonic-gate 	void *p;
64*0Sstevel@tonic-gate 
65*0Sstevel@tonic-gate 	mp = dbmp->mp;
66*0Sstevel@tonic-gate 
67*0Sstevel@tonic-gate 	nomore = 0;
68*0Sstevel@tonic-gate alloc:	if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
69*0Sstevel@tonic-gate 		if (offsetp != NULL)
70*0Sstevel@tonic-gate 			*offsetp = R_OFFSET(dbmp, p);
71*0Sstevel@tonic-gate 		*(void **)retp = p;
72*0Sstevel@tonic-gate 		return (0);
73*0Sstevel@tonic-gate 	}
74*0Sstevel@tonic-gate 	if (nomore) {
75*0Sstevel@tonic-gate 		__db_err(dbmp->dbenv,
76*0Sstevel@tonic-gate 	    "Unable to allocate %lu bytes from mpool shared region: %s\n",
77*0Sstevel@tonic-gate 		    (u_long)len, strerror(ret));
78*0Sstevel@tonic-gate 		return (ret);
79*0Sstevel@tonic-gate 	}
80*0Sstevel@tonic-gate 
81*0Sstevel@tonic-gate 	/* Look for a buffer on the free list that's the right size. */
82*0Sstevel@tonic-gate 	for (bhp =
83*0Sstevel@tonic-gate 	    SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
84*0Sstevel@tonic-gate 		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
85*0Sstevel@tonic-gate 
86*0Sstevel@tonic-gate 		if (__db_shsizeof(bhp) == len) {
87*0Sstevel@tonic-gate 			SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
88*0Sstevel@tonic-gate 			if (offsetp != NULL)
89*0Sstevel@tonic-gate 				*offsetp = R_OFFSET(dbmp, bhp);
90*0Sstevel@tonic-gate 			*(void **)retp = bhp;
91*0Sstevel@tonic-gate 			return (0);
92*0Sstevel@tonic-gate 		}
93*0Sstevel@tonic-gate 	}
94*0Sstevel@tonic-gate 
95*0Sstevel@tonic-gate 	/* Discard from the free list until we've freed enough memory. */
96*0Sstevel@tonic-gate 	total = 0;
97*0Sstevel@tonic-gate 	for (bhp =
98*0Sstevel@tonic-gate 	    SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
99*0Sstevel@tonic-gate 		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
100*0Sstevel@tonic-gate 
101*0Sstevel@tonic-gate 		SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
102*0Sstevel@tonic-gate 		__db_shalloc_free(dbmp->addr, bhp);
103*0Sstevel@tonic-gate 		--mp->stat.st_page_clean;
104*0Sstevel@tonic-gate 
105*0Sstevel@tonic-gate 		/*
106*0Sstevel@tonic-gate 		 * Retry as soon as we've freed up sufficient space.  If we
107*0Sstevel@tonic-gate 		 * will have to coalesce memory to satisfy the request, don't
108*0Sstevel@tonic-gate 		 * try until it's likely (possible?) that we'll succeed.
109*0Sstevel@tonic-gate 		 */
110*0Sstevel@tonic-gate 		total += fsize = __db_shsizeof(bhp);
111*0Sstevel@tonic-gate 		if (fsize >= len || total >= 3 * len)
112*0Sstevel@tonic-gate 			goto alloc;
113*0Sstevel@tonic-gate 	}
114*0Sstevel@tonic-gate 
115*0Sstevel@tonic-gate retry:	/* Find a buffer we can flush; pure LRU. */
116*0Sstevel@tonic-gate 	restart = total = 0;
117*0Sstevel@tonic-gate 	for (bhp =
118*0Sstevel@tonic-gate 	    SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
119*0Sstevel@tonic-gate 		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
120*0Sstevel@tonic-gate 
121*0Sstevel@tonic-gate 		/* Ignore pinned or locked (I/O in progress) buffers. */
122*0Sstevel@tonic-gate 		if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
123*0Sstevel@tonic-gate 			continue;
124*0Sstevel@tonic-gate 
125*0Sstevel@tonic-gate 		/* Find the associated MPOOLFILE. */
126*0Sstevel@tonic-gate 		mfp = R_ADDR(dbmp, bhp->mf_offset);
127*0Sstevel@tonic-gate 
128*0Sstevel@tonic-gate 		/*
129*0Sstevel@tonic-gate 		 * Write the page if it's dirty.
130*0Sstevel@tonic-gate 		 *
131*0Sstevel@tonic-gate 		 * If we wrote the page, fall through and free the buffer.  We
132*0Sstevel@tonic-gate 		 * don't have to rewalk the list to acquire the buffer because
133*0Sstevel@tonic-gate 		 * it was never available for any other process to modify it.
134*0Sstevel@tonic-gate 		 * If we didn't write the page, but we discarded and reacquired
135*0Sstevel@tonic-gate 		 * the region lock, restart the buffer list walk.  If we neither
136*0Sstevel@tonic-gate 		 * wrote the buffer nor discarded the region lock, continue down
137*0Sstevel@tonic-gate 		 * the buffer list.
138*0Sstevel@tonic-gate 		 */
139*0Sstevel@tonic-gate 		if (F_ISSET(bhp, BH_DIRTY)) {
140*0Sstevel@tonic-gate 			++bhp->ref;
141*0Sstevel@tonic-gate 			if ((ret = __memp_bhwrite(dbmp,
142*0Sstevel@tonic-gate 			    mfp, bhp, &restart, &wrote)) != 0)
143*0Sstevel@tonic-gate 				return (ret);
144*0Sstevel@tonic-gate 			--bhp->ref;
145*0Sstevel@tonic-gate 
146*0Sstevel@tonic-gate 			/*
147*0Sstevel@tonic-gate 			 * It's possible that another process wants this buffer
148*0Sstevel@tonic-gate 			 * and incremented the ref count while we were writing
149*0Sstevel@tonic-gate 			 * it.
150*0Sstevel@tonic-gate 			 */
151*0Sstevel@tonic-gate 			if (bhp->ref != 0)
152*0Sstevel@tonic-gate 				goto retry;
153*0Sstevel@tonic-gate 
154*0Sstevel@tonic-gate 			if (wrote)
155*0Sstevel@tonic-gate 				++mp->stat.st_rw_evict;
156*0Sstevel@tonic-gate 			else {
157*0Sstevel@tonic-gate 				if (restart)
158*0Sstevel@tonic-gate 					goto retry;
159*0Sstevel@tonic-gate 				continue;
160*0Sstevel@tonic-gate 			}
161*0Sstevel@tonic-gate 		} else
162*0Sstevel@tonic-gate 			++mp->stat.st_ro_evict;
163*0Sstevel@tonic-gate 
164*0Sstevel@tonic-gate 		/*
165*0Sstevel@tonic-gate 		 * Check to see if the buffer is the size we're looking for.
166*0Sstevel@tonic-gate 		 * If it is, simply reuse it.
167*0Sstevel@tonic-gate 		 */
168*0Sstevel@tonic-gate 		total += fsize = __db_shsizeof(bhp);
169*0Sstevel@tonic-gate 		if (fsize == len) {
170*0Sstevel@tonic-gate 			__memp_bhfree(dbmp, mfp, bhp, 0);
171*0Sstevel@tonic-gate 
172*0Sstevel@tonic-gate 			if (offsetp != NULL)
173*0Sstevel@tonic-gate 				*offsetp = R_OFFSET(dbmp, bhp);
174*0Sstevel@tonic-gate 			*(void **)retp = bhp;
175*0Sstevel@tonic-gate 			return (0);
176*0Sstevel@tonic-gate 		}
177*0Sstevel@tonic-gate 
178*0Sstevel@tonic-gate 		/* Free the buffer. */
179*0Sstevel@tonic-gate 		__memp_bhfree(dbmp, mfp, bhp, 1);
180*0Sstevel@tonic-gate 
181*0Sstevel@tonic-gate 		/*
182*0Sstevel@tonic-gate 		 * Retry as soon as we've freed up sufficient space.  If we
183*0Sstevel@tonic-gate 		 * have to coalesce of memory to satisfy the request, don't
184*0Sstevel@tonic-gate 		 * try until it's likely (possible?) that we'll succeed.
185*0Sstevel@tonic-gate 		 */
186*0Sstevel@tonic-gate 		if (fsize >= len || total >= 3 * len)
187*0Sstevel@tonic-gate 			goto alloc;
188*0Sstevel@tonic-gate 
189*0Sstevel@tonic-gate 		/* Restart the walk if we discarded the region lock. */
190*0Sstevel@tonic-gate 		if (restart)
191*0Sstevel@tonic-gate 			goto retry;
192*0Sstevel@tonic-gate 	}
193*0Sstevel@tonic-gate 	nomore = 1;
194*0Sstevel@tonic-gate 	goto alloc;
195*0Sstevel@tonic-gate }
196*0Sstevel@tonic-gate 
197*0Sstevel@tonic-gate /*
198*0Sstevel@tonic-gate  * __memp_ropen --
199*0Sstevel@tonic-gate  *	Attach to, and optionally create, the mpool region.
200*0Sstevel@tonic-gate  *
201*0Sstevel@tonic-gate  * PUBLIC: int __memp_ropen
202*0Sstevel@tonic-gate  * PUBLIC:    __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
203*0Sstevel@tonic-gate  */
204*0Sstevel@tonic-gate int
__memp_ropen(dbmp,path,cachesize,mode,is_private,flags)205*0Sstevel@tonic-gate __memp_ropen(dbmp, path, cachesize, mode, is_private, flags)
206*0Sstevel@tonic-gate 	DB_MPOOL *dbmp;
207*0Sstevel@tonic-gate 	const char *path;
208*0Sstevel@tonic-gate 	size_t cachesize;
209*0Sstevel@tonic-gate 	int mode, is_private;
210*0Sstevel@tonic-gate 	u_int32_t flags;
211*0Sstevel@tonic-gate {
212*0Sstevel@tonic-gate 	MPOOL *mp;
213*0Sstevel@tonic-gate 	size_t rlen;
214*0Sstevel@tonic-gate 	int defcache, ret;
215*0Sstevel@tonic-gate 
216*0Sstevel@tonic-gate 	/*
217*0Sstevel@tonic-gate 	 * Unlike other DB subsystems, mpool can't simply grow the region
218*0Sstevel@tonic-gate 	 * because it returns pointers into the region to its clients.  To
219*0Sstevel@tonic-gate 	 * "grow" the region, we'd have to allocate a new region and then
220*0Sstevel@tonic-gate 	 * store a region number in the structures that reference regional
221*0Sstevel@tonic-gate 	 * objects.  It's reasonable that we fail regardless, as clients
222*0Sstevel@tonic-gate 	 * shouldn't have every page in the region pinned, so the only
223*0Sstevel@tonic-gate 	 * "failure" mode should be a performance penalty because we don't
224*0Sstevel@tonic-gate 	 * find a page in the cache that we'd like to have found.
225*0Sstevel@tonic-gate 	 *
226*0Sstevel@tonic-gate 	 * Up the user's cachesize by 25% to account for our overhead.
227*0Sstevel@tonic-gate 	 */
228*0Sstevel@tonic-gate 	defcache = 0;
229*0Sstevel@tonic-gate 	if (cachesize < DB_CACHESIZE_MIN)
230*0Sstevel@tonic-gate 		if (cachesize == 0) {
231*0Sstevel@tonic-gate 			defcache = 1;
232*0Sstevel@tonic-gate 			cachesize = DB_CACHESIZE_DEF;
233*0Sstevel@tonic-gate 		} else
234*0Sstevel@tonic-gate 			cachesize = DB_CACHESIZE_MIN;
235*0Sstevel@tonic-gate 	rlen = cachesize + cachesize / 4;
236*0Sstevel@tonic-gate 
237*0Sstevel@tonic-gate 	/*
238*0Sstevel@tonic-gate 	 * Map in the region.
239*0Sstevel@tonic-gate 	 *
240*0Sstevel@tonic-gate 	 * If it's a private mpool, use malloc, it's a lot faster than
241*0Sstevel@tonic-gate 	 * instantiating a region.
242*0Sstevel@tonic-gate 	 */
243*0Sstevel@tonic-gate 	dbmp->reginfo.dbenv = dbmp->dbenv;
244*0Sstevel@tonic-gate 	dbmp->reginfo.appname = DB_APP_NONE;
245*0Sstevel@tonic-gate 	if (path == NULL)
246*0Sstevel@tonic-gate 		dbmp->reginfo.path = NULL;
247*0Sstevel@tonic-gate 	else
248*0Sstevel@tonic-gate 		if ((ret = __os_strdup(path, &dbmp->reginfo.path)) != 0)
249*0Sstevel@tonic-gate 			return (ret);
250*0Sstevel@tonic-gate 	dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE;
251*0Sstevel@tonic-gate 	dbmp->reginfo.mode = mode;
252*0Sstevel@tonic-gate 	dbmp->reginfo.size = rlen;
253*0Sstevel@tonic-gate 	dbmp->reginfo.dbflags = flags;
254*0Sstevel@tonic-gate 	dbmp->reginfo.flags = 0;
255*0Sstevel@tonic-gate 	if (defcache)
256*0Sstevel@tonic-gate 		F_SET(&dbmp->reginfo, REGION_SIZEDEF);
257*0Sstevel@tonic-gate 
258*0Sstevel@tonic-gate 	/*
259*0Sstevel@tonic-gate 	 * If we're creating a temporary region, don't use any standard
260*0Sstevel@tonic-gate 	 * naming.
261*0Sstevel@tonic-gate 	 */
262*0Sstevel@tonic-gate 	if (is_private) {
263*0Sstevel@tonic-gate 		dbmp->reginfo.appname = DB_APP_TMP;
264*0Sstevel@tonic-gate 		dbmp->reginfo.file = NULL;
265*0Sstevel@tonic-gate 		F_SET(&dbmp->reginfo, REGION_PRIVATE);
266*0Sstevel@tonic-gate 	}
267*0Sstevel@tonic-gate 
268*0Sstevel@tonic-gate 	if ((ret = __db_rattach(&dbmp->reginfo)) != 0) {
269*0Sstevel@tonic-gate 		if (dbmp->reginfo.path != NULL)
270*0Sstevel@tonic-gate 			__os_freestr(dbmp->reginfo.path);
271*0Sstevel@tonic-gate 		return (ret);
272*0Sstevel@tonic-gate 	}
273*0Sstevel@tonic-gate 
274*0Sstevel@tonic-gate 	/*
275*0Sstevel@tonic-gate 	 * The MPOOL structure is first in the region, the rest of the region
276*0Sstevel@tonic-gate 	 * is free space.
277*0Sstevel@tonic-gate 	 */
278*0Sstevel@tonic-gate 	dbmp->mp = dbmp->reginfo.addr;
279*0Sstevel@tonic-gate 	dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL);
280*0Sstevel@tonic-gate 
281*0Sstevel@tonic-gate 	/* Initialize a created region. */
282*0Sstevel@tonic-gate 	if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) {
283*0Sstevel@tonic-gate 		mp = dbmp->mp;
284*0Sstevel@tonic-gate 		SH_TAILQ_INIT(&mp->bhq);
285*0Sstevel@tonic-gate 		SH_TAILQ_INIT(&mp->bhfq);
286*0Sstevel@tonic-gate 		SH_TAILQ_INIT(&mp->mpfq);
287*0Sstevel@tonic-gate 
288*0Sstevel@tonic-gate 		__db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
289*0Sstevel@tonic-gate 
290*0Sstevel@tonic-gate 		/*
291*0Sstevel@tonic-gate 		 * Assume we want to keep the hash chains with under 10 pages
292*0Sstevel@tonic-gate 		 * on each chain.  We don't know the pagesize in advance, and
293*0Sstevel@tonic-gate 		 * it may differ for different files.  Use a pagesize of 1K for
294*0Sstevel@tonic-gate 		 * the calculation -- we walk these chains a lot, they should
295*0Sstevel@tonic-gate 		 * be short.
296*0Sstevel@tonic-gate 		 */
297*0Sstevel@tonic-gate 		mp->htab_buckets =
298*0Sstevel@tonic-gate 		    __db_tablesize((cachesize / (1 * 1024)) / 10);
299*0Sstevel@tonic-gate 
300*0Sstevel@tonic-gate 		/* Allocate hash table space and initialize it. */
301*0Sstevel@tonic-gate 		if ((ret = __db_shalloc(dbmp->addr,
302*0Sstevel@tonic-gate 		    mp->htab_buckets * sizeof(DB_HASHTAB),
303*0Sstevel@tonic-gate 		    0, &dbmp->htab)) != 0)
304*0Sstevel@tonic-gate 			goto err;
305*0Sstevel@tonic-gate 		__db_hashinit(dbmp->htab, mp->htab_buckets);
306*0Sstevel@tonic-gate 		mp->htab = R_OFFSET(dbmp, dbmp->htab);
307*0Sstevel@tonic-gate 
308*0Sstevel@tonic-gate 		ZERO_LSN(mp->lsn);
309*0Sstevel@tonic-gate 		mp->lsn_cnt = 0;
310*0Sstevel@tonic-gate 
311*0Sstevel@tonic-gate 		memset(&mp->stat, 0, sizeof(mp->stat));
312*0Sstevel@tonic-gate 		mp->stat.st_cachesize = cachesize;
313*0Sstevel@tonic-gate 
314*0Sstevel@tonic-gate 		mp->flags = 0;
315*0Sstevel@tonic-gate 	}
316*0Sstevel@tonic-gate 
317*0Sstevel@tonic-gate 	/* Get the local hash table address. */
318*0Sstevel@tonic-gate 	dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
319*0Sstevel@tonic-gate 
320*0Sstevel@tonic-gate 	UNLOCKREGION(dbmp);
321*0Sstevel@tonic-gate 	return (0);
322*0Sstevel@tonic-gate 
323*0Sstevel@tonic-gate err:	UNLOCKREGION(dbmp);
324*0Sstevel@tonic-gate 	(void)__db_rdetach(&dbmp->reginfo);
325*0Sstevel@tonic-gate 	if (F_ISSET(&dbmp->reginfo, REGION_CREATED))
326*0Sstevel@tonic-gate 		(void)memp_unlink(path, 1, dbmp->dbenv);
327*0Sstevel@tonic-gate 
328*0Sstevel@tonic-gate 	if (dbmp->reginfo.path != NULL)
329*0Sstevel@tonic-gate 		__os_freestr(dbmp->reginfo.path);
330*0Sstevel@tonic-gate 	return (ret);
331*0Sstevel@tonic-gate }
332