1*0Sstevel@tonic-gate /*-
2*0Sstevel@tonic-gate * See the file LICENSE for redistribution information.
3*0Sstevel@tonic-gate *
4*0Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998
5*0Sstevel@tonic-gate * Sleepycat Software. All rights reserved.
6*0Sstevel@tonic-gate */
7*0Sstevel@tonic-gate #include "config.h"
8*0Sstevel@tonic-gate
9*0Sstevel@tonic-gate #ifndef lint
10*0Sstevel@tonic-gate static const char sccsid[] = "@(#)mp_region.c 10.35 (Sleepycat) 12/11/98";
11*0Sstevel@tonic-gate #endif /* not lint */
12*0Sstevel@tonic-gate
13*0Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
14*0Sstevel@tonic-gate #include <sys/types.h>
15*0Sstevel@tonic-gate
16*0Sstevel@tonic-gate #include <errno.h>
17*0Sstevel@tonic-gate #include <string.h>
18*0Sstevel@tonic-gate #endif
19*0Sstevel@tonic-gate
20*0Sstevel@tonic-gate #include "db_int.h"
21*0Sstevel@tonic-gate #include "shqueue.h"
22*0Sstevel@tonic-gate #include "db_shash.h"
23*0Sstevel@tonic-gate #include "mp.h"
24*0Sstevel@tonic-gate #include "common_ext.h"
25*0Sstevel@tonic-gate
26*0Sstevel@tonic-gate /*
27*0Sstevel@tonic-gate * __memp_reg_alloc --
28*0Sstevel@tonic-gate * Allocate some space in the mpool region, with locking.
29*0Sstevel@tonic-gate *
30*0Sstevel@tonic-gate * PUBLIC: int __memp_reg_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
31*0Sstevel@tonic-gate */
32*0Sstevel@tonic-gate int
__memp_reg_alloc(dbmp,len,offsetp,retp)33*0Sstevel@tonic-gate __memp_reg_alloc(dbmp, len, offsetp, retp)
34*0Sstevel@tonic-gate DB_MPOOL *dbmp;
35*0Sstevel@tonic-gate size_t len, *offsetp;
36*0Sstevel@tonic-gate void *retp;
37*0Sstevel@tonic-gate {
38*0Sstevel@tonic-gate int ret;
39*0Sstevel@tonic-gate
40*0Sstevel@tonic-gate LOCKREGION(dbmp);
41*0Sstevel@tonic-gate ret = __memp_alloc(dbmp, len, offsetp, retp);
42*0Sstevel@tonic-gate UNLOCKREGION(dbmp);
43*0Sstevel@tonic-gate return (ret);
44*0Sstevel@tonic-gate }
45*0Sstevel@tonic-gate
46*0Sstevel@tonic-gate /*
47*0Sstevel@tonic-gate * __memp_alloc --
48*0Sstevel@tonic-gate * Allocate some space in the mpool region.
49*0Sstevel@tonic-gate *
50*0Sstevel@tonic-gate * PUBLIC: int __memp_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
51*0Sstevel@tonic-gate */
52*0Sstevel@tonic-gate int
__memp_alloc(dbmp,len,offsetp,retp)53*0Sstevel@tonic-gate __memp_alloc(dbmp, len, offsetp, retp)
54*0Sstevel@tonic-gate DB_MPOOL *dbmp;
55*0Sstevel@tonic-gate size_t len, *offsetp;
56*0Sstevel@tonic-gate void *retp;
57*0Sstevel@tonic-gate {
58*0Sstevel@tonic-gate BH *bhp, *nbhp;
59*0Sstevel@tonic-gate MPOOL *mp;
60*0Sstevel@tonic-gate MPOOLFILE *mfp;
61*0Sstevel@tonic-gate size_t fsize, total;
62*0Sstevel@tonic-gate int nomore, restart, ret, wrote;
63*0Sstevel@tonic-gate void *p;
64*0Sstevel@tonic-gate
65*0Sstevel@tonic-gate mp = dbmp->mp;
66*0Sstevel@tonic-gate
67*0Sstevel@tonic-gate nomore = 0;
68*0Sstevel@tonic-gate alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
69*0Sstevel@tonic-gate if (offsetp != NULL)
70*0Sstevel@tonic-gate *offsetp = R_OFFSET(dbmp, p);
71*0Sstevel@tonic-gate *(void **)retp = p;
72*0Sstevel@tonic-gate return (0);
73*0Sstevel@tonic-gate }
74*0Sstevel@tonic-gate if (nomore) {
75*0Sstevel@tonic-gate __db_err(dbmp->dbenv,
76*0Sstevel@tonic-gate "Unable to allocate %lu bytes from mpool shared region: %s\n",
77*0Sstevel@tonic-gate (u_long)len, strerror(ret));
78*0Sstevel@tonic-gate return (ret);
79*0Sstevel@tonic-gate }
80*0Sstevel@tonic-gate
81*0Sstevel@tonic-gate /* Look for a buffer on the free list that's the right size. */
82*0Sstevel@tonic-gate for (bhp =
83*0Sstevel@tonic-gate SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
84*0Sstevel@tonic-gate nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
85*0Sstevel@tonic-gate
86*0Sstevel@tonic-gate if (__db_shsizeof(bhp) == len) {
87*0Sstevel@tonic-gate SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
88*0Sstevel@tonic-gate if (offsetp != NULL)
89*0Sstevel@tonic-gate *offsetp = R_OFFSET(dbmp, bhp);
90*0Sstevel@tonic-gate *(void **)retp = bhp;
91*0Sstevel@tonic-gate return (0);
92*0Sstevel@tonic-gate }
93*0Sstevel@tonic-gate }
94*0Sstevel@tonic-gate
95*0Sstevel@tonic-gate /* Discard from the free list until we've freed enough memory. */
96*0Sstevel@tonic-gate total = 0;
97*0Sstevel@tonic-gate for (bhp =
98*0Sstevel@tonic-gate SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
99*0Sstevel@tonic-gate nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
100*0Sstevel@tonic-gate
101*0Sstevel@tonic-gate SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
102*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, bhp);
103*0Sstevel@tonic-gate --mp->stat.st_page_clean;
104*0Sstevel@tonic-gate
105*0Sstevel@tonic-gate /*
106*0Sstevel@tonic-gate * Retry as soon as we've freed up sufficient space. If we
107*0Sstevel@tonic-gate * will have to coalesce memory to satisfy the request, don't
108*0Sstevel@tonic-gate * try until it's likely (possible?) that we'll succeed.
109*0Sstevel@tonic-gate */
110*0Sstevel@tonic-gate total += fsize = __db_shsizeof(bhp);
111*0Sstevel@tonic-gate if (fsize >= len || total >= 3 * len)
112*0Sstevel@tonic-gate goto alloc;
113*0Sstevel@tonic-gate }
114*0Sstevel@tonic-gate
115*0Sstevel@tonic-gate retry: /* Find a buffer we can flush; pure LRU. */
116*0Sstevel@tonic-gate restart = total = 0;
117*0Sstevel@tonic-gate for (bhp =
118*0Sstevel@tonic-gate SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
119*0Sstevel@tonic-gate nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
120*0Sstevel@tonic-gate
121*0Sstevel@tonic-gate /* Ignore pinned or locked (I/O in progress) buffers. */
122*0Sstevel@tonic-gate if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
123*0Sstevel@tonic-gate continue;
124*0Sstevel@tonic-gate
125*0Sstevel@tonic-gate /* Find the associated MPOOLFILE. */
126*0Sstevel@tonic-gate mfp = R_ADDR(dbmp, bhp->mf_offset);
127*0Sstevel@tonic-gate
128*0Sstevel@tonic-gate /*
129*0Sstevel@tonic-gate * Write the page if it's dirty.
130*0Sstevel@tonic-gate *
131*0Sstevel@tonic-gate * If we wrote the page, fall through and free the buffer. We
132*0Sstevel@tonic-gate * don't have to rewalk the list to acquire the buffer because
133*0Sstevel@tonic-gate * it was never available for any other process to modify it.
134*0Sstevel@tonic-gate * If we didn't write the page, but we discarded and reacquired
135*0Sstevel@tonic-gate * the region lock, restart the buffer list walk. If we neither
136*0Sstevel@tonic-gate * wrote the buffer nor discarded the region lock, continue down
137*0Sstevel@tonic-gate * the buffer list.
138*0Sstevel@tonic-gate */
139*0Sstevel@tonic-gate if (F_ISSET(bhp, BH_DIRTY)) {
140*0Sstevel@tonic-gate ++bhp->ref;
141*0Sstevel@tonic-gate if ((ret = __memp_bhwrite(dbmp,
142*0Sstevel@tonic-gate mfp, bhp, &restart, &wrote)) != 0)
143*0Sstevel@tonic-gate return (ret);
144*0Sstevel@tonic-gate --bhp->ref;
145*0Sstevel@tonic-gate
146*0Sstevel@tonic-gate /*
147*0Sstevel@tonic-gate * It's possible that another process wants this buffer
148*0Sstevel@tonic-gate * and incremented the ref count while we were writing
149*0Sstevel@tonic-gate * it.
150*0Sstevel@tonic-gate */
151*0Sstevel@tonic-gate if (bhp->ref != 0)
152*0Sstevel@tonic-gate goto retry;
153*0Sstevel@tonic-gate
154*0Sstevel@tonic-gate if (wrote)
155*0Sstevel@tonic-gate ++mp->stat.st_rw_evict;
156*0Sstevel@tonic-gate else {
157*0Sstevel@tonic-gate if (restart)
158*0Sstevel@tonic-gate goto retry;
159*0Sstevel@tonic-gate continue;
160*0Sstevel@tonic-gate }
161*0Sstevel@tonic-gate } else
162*0Sstevel@tonic-gate ++mp->stat.st_ro_evict;
163*0Sstevel@tonic-gate
164*0Sstevel@tonic-gate /*
165*0Sstevel@tonic-gate * Check to see if the buffer is the size we're looking for.
166*0Sstevel@tonic-gate * If it is, simply reuse it.
167*0Sstevel@tonic-gate */
168*0Sstevel@tonic-gate total += fsize = __db_shsizeof(bhp);
169*0Sstevel@tonic-gate if (fsize == len) {
170*0Sstevel@tonic-gate __memp_bhfree(dbmp, mfp, bhp, 0);
171*0Sstevel@tonic-gate
172*0Sstevel@tonic-gate if (offsetp != NULL)
173*0Sstevel@tonic-gate *offsetp = R_OFFSET(dbmp, bhp);
174*0Sstevel@tonic-gate *(void **)retp = bhp;
175*0Sstevel@tonic-gate return (0);
176*0Sstevel@tonic-gate }
177*0Sstevel@tonic-gate
178*0Sstevel@tonic-gate /* Free the buffer. */
179*0Sstevel@tonic-gate __memp_bhfree(dbmp, mfp, bhp, 1);
180*0Sstevel@tonic-gate
181*0Sstevel@tonic-gate /*
182*0Sstevel@tonic-gate * Retry as soon as we've freed up sufficient space. If we
183*0Sstevel@tonic-gate * have to coalesce of memory to satisfy the request, don't
184*0Sstevel@tonic-gate * try until it's likely (possible?) that we'll succeed.
185*0Sstevel@tonic-gate */
186*0Sstevel@tonic-gate if (fsize >= len || total >= 3 * len)
187*0Sstevel@tonic-gate goto alloc;
188*0Sstevel@tonic-gate
189*0Sstevel@tonic-gate /* Restart the walk if we discarded the region lock. */
190*0Sstevel@tonic-gate if (restart)
191*0Sstevel@tonic-gate goto retry;
192*0Sstevel@tonic-gate }
193*0Sstevel@tonic-gate nomore = 1;
194*0Sstevel@tonic-gate goto alloc;
195*0Sstevel@tonic-gate }
196*0Sstevel@tonic-gate
197*0Sstevel@tonic-gate /*
198*0Sstevel@tonic-gate * __memp_ropen --
199*0Sstevel@tonic-gate * Attach to, and optionally create, the mpool region.
200*0Sstevel@tonic-gate *
201*0Sstevel@tonic-gate * PUBLIC: int __memp_ropen
202*0Sstevel@tonic-gate * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
203*0Sstevel@tonic-gate */
204*0Sstevel@tonic-gate int
__memp_ropen(dbmp,path,cachesize,mode,is_private,flags)205*0Sstevel@tonic-gate __memp_ropen(dbmp, path, cachesize, mode, is_private, flags)
206*0Sstevel@tonic-gate DB_MPOOL *dbmp;
207*0Sstevel@tonic-gate const char *path;
208*0Sstevel@tonic-gate size_t cachesize;
209*0Sstevel@tonic-gate int mode, is_private;
210*0Sstevel@tonic-gate u_int32_t flags;
211*0Sstevel@tonic-gate {
212*0Sstevel@tonic-gate MPOOL *mp;
213*0Sstevel@tonic-gate size_t rlen;
214*0Sstevel@tonic-gate int defcache, ret;
215*0Sstevel@tonic-gate
216*0Sstevel@tonic-gate /*
217*0Sstevel@tonic-gate * Unlike other DB subsystems, mpool can't simply grow the region
218*0Sstevel@tonic-gate * because it returns pointers into the region to its clients. To
219*0Sstevel@tonic-gate * "grow" the region, we'd have to allocate a new region and then
220*0Sstevel@tonic-gate * store a region number in the structures that reference regional
221*0Sstevel@tonic-gate * objects. It's reasonable that we fail regardless, as clients
222*0Sstevel@tonic-gate * shouldn't have every page in the region pinned, so the only
223*0Sstevel@tonic-gate * "failure" mode should be a performance penalty because we don't
224*0Sstevel@tonic-gate * find a page in the cache that we'd like to have found.
225*0Sstevel@tonic-gate *
226*0Sstevel@tonic-gate * Up the user's cachesize by 25% to account for our overhead.
227*0Sstevel@tonic-gate */
228*0Sstevel@tonic-gate defcache = 0;
229*0Sstevel@tonic-gate if (cachesize < DB_CACHESIZE_MIN)
230*0Sstevel@tonic-gate if (cachesize == 0) {
231*0Sstevel@tonic-gate defcache = 1;
232*0Sstevel@tonic-gate cachesize = DB_CACHESIZE_DEF;
233*0Sstevel@tonic-gate } else
234*0Sstevel@tonic-gate cachesize = DB_CACHESIZE_MIN;
235*0Sstevel@tonic-gate rlen = cachesize + cachesize / 4;
236*0Sstevel@tonic-gate
237*0Sstevel@tonic-gate /*
238*0Sstevel@tonic-gate * Map in the region.
239*0Sstevel@tonic-gate *
240*0Sstevel@tonic-gate * If it's a private mpool, use malloc, it's a lot faster than
241*0Sstevel@tonic-gate * instantiating a region.
242*0Sstevel@tonic-gate */
243*0Sstevel@tonic-gate dbmp->reginfo.dbenv = dbmp->dbenv;
244*0Sstevel@tonic-gate dbmp->reginfo.appname = DB_APP_NONE;
245*0Sstevel@tonic-gate if (path == NULL)
246*0Sstevel@tonic-gate dbmp->reginfo.path = NULL;
247*0Sstevel@tonic-gate else
248*0Sstevel@tonic-gate if ((ret = __os_strdup(path, &dbmp->reginfo.path)) != 0)
249*0Sstevel@tonic-gate return (ret);
250*0Sstevel@tonic-gate dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE;
251*0Sstevel@tonic-gate dbmp->reginfo.mode = mode;
252*0Sstevel@tonic-gate dbmp->reginfo.size = rlen;
253*0Sstevel@tonic-gate dbmp->reginfo.dbflags = flags;
254*0Sstevel@tonic-gate dbmp->reginfo.flags = 0;
255*0Sstevel@tonic-gate if (defcache)
256*0Sstevel@tonic-gate F_SET(&dbmp->reginfo, REGION_SIZEDEF);
257*0Sstevel@tonic-gate
258*0Sstevel@tonic-gate /*
259*0Sstevel@tonic-gate * If we're creating a temporary region, don't use any standard
260*0Sstevel@tonic-gate * naming.
261*0Sstevel@tonic-gate */
262*0Sstevel@tonic-gate if (is_private) {
263*0Sstevel@tonic-gate dbmp->reginfo.appname = DB_APP_TMP;
264*0Sstevel@tonic-gate dbmp->reginfo.file = NULL;
265*0Sstevel@tonic-gate F_SET(&dbmp->reginfo, REGION_PRIVATE);
266*0Sstevel@tonic-gate }
267*0Sstevel@tonic-gate
268*0Sstevel@tonic-gate if ((ret = __db_rattach(&dbmp->reginfo)) != 0) {
269*0Sstevel@tonic-gate if (dbmp->reginfo.path != NULL)
270*0Sstevel@tonic-gate __os_freestr(dbmp->reginfo.path);
271*0Sstevel@tonic-gate return (ret);
272*0Sstevel@tonic-gate }
273*0Sstevel@tonic-gate
274*0Sstevel@tonic-gate /*
275*0Sstevel@tonic-gate * The MPOOL structure is first in the region, the rest of the region
276*0Sstevel@tonic-gate * is free space.
277*0Sstevel@tonic-gate */
278*0Sstevel@tonic-gate dbmp->mp = dbmp->reginfo.addr;
279*0Sstevel@tonic-gate dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL);
280*0Sstevel@tonic-gate
281*0Sstevel@tonic-gate /* Initialize a created region. */
282*0Sstevel@tonic-gate if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) {
283*0Sstevel@tonic-gate mp = dbmp->mp;
284*0Sstevel@tonic-gate SH_TAILQ_INIT(&mp->bhq);
285*0Sstevel@tonic-gate SH_TAILQ_INIT(&mp->bhfq);
286*0Sstevel@tonic-gate SH_TAILQ_INIT(&mp->mpfq);
287*0Sstevel@tonic-gate
288*0Sstevel@tonic-gate __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
289*0Sstevel@tonic-gate
290*0Sstevel@tonic-gate /*
291*0Sstevel@tonic-gate * Assume we want to keep the hash chains with under 10 pages
292*0Sstevel@tonic-gate * on each chain. We don't know the pagesize in advance, and
293*0Sstevel@tonic-gate * it may differ for different files. Use a pagesize of 1K for
294*0Sstevel@tonic-gate * the calculation -- we walk these chains a lot, they should
295*0Sstevel@tonic-gate * be short.
296*0Sstevel@tonic-gate */
297*0Sstevel@tonic-gate mp->htab_buckets =
298*0Sstevel@tonic-gate __db_tablesize((cachesize / (1 * 1024)) / 10);
299*0Sstevel@tonic-gate
300*0Sstevel@tonic-gate /* Allocate hash table space and initialize it. */
301*0Sstevel@tonic-gate if ((ret = __db_shalloc(dbmp->addr,
302*0Sstevel@tonic-gate mp->htab_buckets * sizeof(DB_HASHTAB),
303*0Sstevel@tonic-gate 0, &dbmp->htab)) != 0)
304*0Sstevel@tonic-gate goto err;
305*0Sstevel@tonic-gate __db_hashinit(dbmp->htab, mp->htab_buckets);
306*0Sstevel@tonic-gate mp->htab = R_OFFSET(dbmp, dbmp->htab);
307*0Sstevel@tonic-gate
308*0Sstevel@tonic-gate ZERO_LSN(mp->lsn);
309*0Sstevel@tonic-gate mp->lsn_cnt = 0;
310*0Sstevel@tonic-gate
311*0Sstevel@tonic-gate memset(&mp->stat, 0, sizeof(mp->stat));
312*0Sstevel@tonic-gate mp->stat.st_cachesize = cachesize;
313*0Sstevel@tonic-gate
314*0Sstevel@tonic-gate mp->flags = 0;
315*0Sstevel@tonic-gate }
316*0Sstevel@tonic-gate
317*0Sstevel@tonic-gate /* Get the local hash table address. */
318*0Sstevel@tonic-gate dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
319*0Sstevel@tonic-gate
320*0Sstevel@tonic-gate UNLOCKREGION(dbmp);
321*0Sstevel@tonic-gate return (0);
322*0Sstevel@tonic-gate
323*0Sstevel@tonic-gate err: UNLOCKREGION(dbmp);
324*0Sstevel@tonic-gate (void)__db_rdetach(&dbmp->reginfo);
325*0Sstevel@tonic-gate if (F_ISSET(&dbmp->reginfo, REGION_CREATED))
326*0Sstevel@tonic-gate (void)memp_unlink(path, 1, dbmp->dbenv);
327*0Sstevel@tonic-gate
328*0Sstevel@tonic-gate if (dbmp->reginfo.path != NULL)
329*0Sstevel@tonic-gate __os_freestr(dbmp->reginfo.path);
330*0Sstevel@tonic-gate return (ret);
331*0Sstevel@tonic-gate }
332