1*0Sstevel@tonic-gate /*-
2*0Sstevel@tonic-gate * See the file LICENSE for redistribution information.
3*0Sstevel@tonic-gate *
4*0Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998
5*0Sstevel@tonic-gate * Sleepycat Software. All rights reserved.
6*0Sstevel@tonic-gate */
7*0Sstevel@tonic-gate #include "config.h"
8*0Sstevel@tonic-gate
9*0Sstevel@tonic-gate #ifndef lint
10*0Sstevel@tonic-gate static const char sccsid[] = "@(#)mp_fopen.c 10.60 (Sleepycat) 1/1/99";
11*0Sstevel@tonic-gate #endif /* not lint */
12*0Sstevel@tonic-gate
13*0Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
14*0Sstevel@tonic-gate #include <sys/types.h>
15*0Sstevel@tonic-gate
16*0Sstevel@tonic-gate #include <errno.h>
17*0Sstevel@tonic-gate #include <string.h>
18*0Sstevel@tonic-gate #endif
19*0Sstevel@tonic-gate
20*0Sstevel@tonic-gate #include "db_int.h"
21*0Sstevel@tonic-gate #include "shqueue.h"
22*0Sstevel@tonic-gate #include "db_shash.h"
23*0Sstevel@tonic-gate #include "mp.h"
24*0Sstevel@tonic-gate #include "common_ext.h"
25*0Sstevel@tonic-gate
26*0Sstevel@tonic-gate static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
27*0Sstevel@tonic-gate static int __memp_mf_open __P((DB_MPOOL *,
28*0Sstevel@tonic-gate const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **));
29*0Sstevel@tonic-gate
30*0Sstevel@tonic-gate /*
31*0Sstevel@tonic-gate * memp_fopen --
32*0Sstevel@tonic-gate * Open a backing file for the memory pool.
33*0Sstevel@tonic-gate */
34*0Sstevel@tonic-gate int
memp_fopen(dbmp,path,flags,mode,pagesize,finfop,retp)35*0Sstevel@tonic-gate memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp)
36*0Sstevel@tonic-gate DB_MPOOL *dbmp;
37*0Sstevel@tonic-gate const char *path;
38*0Sstevel@tonic-gate u_int32_t flags;
39*0Sstevel@tonic-gate int mode;
40*0Sstevel@tonic-gate size_t pagesize;
41*0Sstevel@tonic-gate DB_MPOOL_FINFO *finfop;
42*0Sstevel@tonic-gate DB_MPOOLFILE **retp;
43*0Sstevel@tonic-gate {
44*0Sstevel@tonic-gate int ret;
45*0Sstevel@tonic-gate
46*0Sstevel@tonic-gate MP_PANIC_CHECK(dbmp);
47*0Sstevel@tonic-gate
48*0Sstevel@tonic-gate /* Validate arguments. */
49*0Sstevel@tonic-gate if ((ret = __db_fchk(dbmp->dbenv,
50*0Sstevel@tonic-gate "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0)
51*0Sstevel@tonic-gate return (ret);
52*0Sstevel@tonic-gate
53*0Sstevel@tonic-gate /* Require a non-zero pagesize. */
54*0Sstevel@tonic-gate if (pagesize == 0) {
55*0Sstevel@tonic-gate __db_err(dbmp->dbenv, "memp_fopen: pagesize not specified");
56*0Sstevel@tonic-gate return (EINVAL);
57*0Sstevel@tonic-gate }
58*0Sstevel@tonic-gate if (finfop != NULL && finfop->clear_len > pagesize)
59*0Sstevel@tonic-gate return (EINVAL);
60*0Sstevel@tonic-gate
61*0Sstevel@tonic-gate return (__memp_fopen(dbmp,
62*0Sstevel@tonic-gate NULL, path, flags, mode, pagesize, 1, finfop, retp));
63*0Sstevel@tonic-gate }
64*0Sstevel@tonic-gate
65*0Sstevel@tonic-gate /*
66*0Sstevel@tonic-gate * __memp_fopen --
67*0Sstevel@tonic-gate * Open a backing file for the memory pool; internal version.
68*0Sstevel@tonic-gate *
69*0Sstevel@tonic-gate * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *,
70*0Sstevel@tonic-gate * PUBLIC: u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
71*0Sstevel@tonic-gate */
72*0Sstevel@tonic-gate int
__memp_fopen(dbmp,mfp,path,flags,mode,pagesize,needlock,finfop,retp)73*0Sstevel@tonic-gate __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp)
74*0Sstevel@tonic-gate DB_MPOOL *dbmp;
75*0Sstevel@tonic-gate MPOOLFILE *mfp;
76*0Sstevel@tonic-gate const char *path;
77*0Sstevel@tonic-gate u_int32_t flags;
78*0Sstevel@tonic-gate int mode, needlock;
79*0Sstevel@tonic-gate size_t pagesize;
80*0Sstevel@tonic-gate DB_MPOOL_FINFO *finfop;
81*0Sstevel@tonic-gate DB_MPOOLFILE **retp;
82*0Sstevel@tonic-gate {
83*0Sstevel@tonic-gate DB_ENV *dbenv;
84*0Sstevel@tonic-gate DB_MPOOLFILE *dbmfp;
85*0Sstevel@tonic-gate DB_MPOOL_FINFO finfo;
86*0Sstevel@tonic-gate db_pgno_t last_pgno;
87*0Sstevel@tonic-gate size_t maxmap;
88*0Sstevel@tonic-gate u_int32_t mbytes, bytes;
89*0Sstevel@tonic-gate int ret;
90*0Sstevel@tonic-gate u_int8_t idbuf[DB_FILE_ID_LEN];
91*0Sstevel@tonic-gate char *rpath;
92*0Sstevel@tonic-gate
93*0Sstevel@tonic-gate dbenv = dbmp->dbenv;
94*0Sstevel@tonic-gate ret = 0;
95*0Sstevel@tonic-gate rpath = NULL;
96*0Sstevel@tonic-gate
97*0Sstevel@tonic-gate /*
98*0Sstevel@tonic-gate * If mfp is provided, we take the DB_MPOOL_FINFO information from
99*0Sstevel@tonic-gate * the mfp. We don't bother initializing everything, because some
100*0Sstevel@tonic-gate * of them are expensive to acquire. If no mfp is provided and the
101*0Sstevel@tonic-gate * finfop argument is NULL, we default the values.
102*0Sstevel@tonic-gate */
103*0Sstevel@tonic-gate if (finfop == NULL) {
104*0Sstevel@tonic-gate memset(&finfo, 0, sizeof(finfo));
105*0Sstevel@tonic-gate if (mfp != NULL) {
106*0Sstevel@tonic-gate finfo.ftype = mfp->ftype;
107*0Sstevel@tonic-gate finfo.pgcookie = NULL;
108*0Sstevel@tonic-gate finfo.fileid = NULL;
109*0Sstevel@tonic-gate finfo.lsn_offset = mfp->lsn_off;
110*0Sstevel@tonic-gate finfo.clear_len = mfp->clear_len;
111*0Sstevel@tonic-gate } else {
112*0Sstevel@tonic-gate finfo.ftype = 0;
113*0Sstevel@tonic-gate finfo.pgcookie = NULL;
114*0Sstevel@tonic-gate finfo.fileid = NULL;
115*0Sstevel@tonic-gate finfo.lsn_offset = -1;
116*0Sstevel@tonic-gate finfo.clear_len = 0;
117*0Sstevel@tonic-gate }
118*0Sstevel@tonic-gate finfop = &finfo;
119*0Sstevel@tonic-gate }
120*0Sstevel@tonic-gate
121*0Sstevel@tonic-gate /* Allocate and initialize the per-process structure. */
122*0Sstevel@tonic-gate if ((ret = __os_calloc(1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0)
123*0Sstevel@tonic-gate return (ret);
124*0Sstevel@tonic-gate dbmfp->dbmp = dbmp;
125*0Sstevel@tonic-gate dbmfp->fd = -1;
126*0Sstevel@tonic-gate dbmfp->ref = 1;
127*0Sstevel@tonic-gate if (LF_ISSET(DB_RDONLY))
128*0Sstevel@tonic-gate F_SET(dbmfp, MP_READONLY);
129*0Sstevel@tonic-gate
130*0Sstevel@tonic-gate if (path == NULL) {
131*0Sstevel@tonic-gate if (LF_ISSET(DB_RDONLY)) {
132*0Sstevel@tonic-gate __db_err(dbenv,
133*0Sstevel@tonic-gate "memp_fopen: temporary files can't be readonly");
134*0Sstevel@tonic-gate ret = EINVAL;
135*0Sstevel@tonic-gate goto err;
136*0Sstevel@tonic-gate }
137*0Sstevel@tonic-gate last_pgno = 0;
138*0Sstevel@tonic-gate } else {
139*0Sstevel@tonic-gate /* Get the real name for this file and open it. */
140*0Sstevel@tonic-gate if ((ret = __db_appname(dbenv,
141*0Sstevel@tonic-gate DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0)
142*0Sstevel@tonic-gate goto err;
143*0Sstevel@tonic-gate if ((ret = __db_open(rpath,
144*0Sstevel@tonic-gate LF_ISSET(DB_CREATE | DB_RDONLY),
145*0Sstevel@tonic-gate DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) {
146*0Sstevel@tonic-gate __db_err(dbenv, "%s: %s", rpath, strerror(ret));
147*0Sstevel@tonic-gate goto err;
148*0Sstevel@tonic-gate }
149*0Sstevel@tonic-gate
150*0Sstevel@tonic-gate /*
151*0Sstevel@tonic-gate * Don't permit files that aren't a multiple of the pagesize,
152*0Sstevel@tonic-gate * and find the number of the last page in the file, all the
153*0Sstevel@tonic-gate * time being careful not to overflow 32 bits.
154*0Sstevel@tonic-gate *
155*0Sstevel@tonic-gate * !!!
156*0Sstevel@tonic-gate * We can't use off_t's here, or in any code in the mainline
157*0Sstevel@tonic-gate * library for that matter. (We have to use them in the os
158*0Sstevel@tonic-gate * stubs, of course, as there are system calls that take them
159*0Sstevel@tonic-gate * as arguments.) The reason is that some customers build in
160*0Sstevel@tonic-gate * environments where an off_t is 32-bits, but still run where
161*0Sstevel@tonic-gate * offsets are 64-bits, and they pay us a lot of money.
162*0Sstevel@tonic-gate */
163*0Sstevel@tonic-gate if ((ret = __os_ioinfo(rpath,
164*0Sstevel@tonic-gate dbmfp->fd, &mbytes, &bytes, NULL)) != 0) {
165*0Sstevel@tonic-gate __db_err(dbenv, "%s: %s", rpath, strerror(ret));
166*0Sstevel@tonic-gate goto err;
167*0Sstevel@tonic-gate }
168*0Sstevel@tonic-gate
169*0Sstevel@tonic-gate /* Page sizes have to be a power-of-two, ignore mbytes. */
170*0Sstevel@tonic-gate if (bytes % pagesize != 0) {
171*0Sstevel@tonic-gate __db_err(dbenv,
172*0Sstevel@tonic-gate "%s: file size not a multiple of the pagesize",
173*0Sstevel@tonic-gate rpath);
174*0Sstevel@tonic-gate ret = EINVAL;
175*0Sstevel@tonic-gate goto err;
176*0Sstevel@tonic-gate }
177*0Sstevel@tonic-gate
178*0Sstevel@tonic-gate last_pgno = mbytes * (MEGABYTE / pagesize);
179*0Sstevel@tonic-gate last_pgno += bytes / pagesize;
180*0Sstevel@tonic-gate
181*0Sstevel@tonic-gate /* Correction: page numbers are zero-based, not 1-based. */
182*0Sstevel@tonic-gate if (last_pgno != 0)
183*0Sstevel@tonic-gate --last_pgno;
184*0Sstevel@tonic-gate
185*0Sstevel@tonic-gate /*
186*0Sstevel@tonic-gate * Get the file id if we weren't given one. Generated file id's
187*0Sstevel@tonic-gate * don't use timestamps, otherwise there'd be no chance of any
188*0Sstevel@tonic-gate * other process joining the party.
189*0Sstevel@tonic-gate */
190*0Sstevel@tonic-gate if (finfop->fileid == NULL) {
191*0Sstevel@tonic-gate if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0)
192*0Sstevel@tonic-gate goto err;
193*0Sstevel@tonic-gate finfop->fileid = idbuf;
194*0Sstevel@tonic-gate }
195*0Sstevel@tonic-gate }
196*0Sstevel@tonic-gate
197*0Sstevel@tonic-gate /*
198*0Sstevel@tonic-gate * If we weren't provided an underlying shared object to join with,
199*0Sstevel@tonic-gate * find/allocate the shared file objects. Also allocate space for
200*0Sstevel@tonic-gate * for the per-process thread lock.
201*0Sstevel@tonic-gate */
202*0Sstevel@tonic-gate if (needlock)
203*0Sstevel@tonic-gate LOCKREGION(dbmp);
204*0Sstevel@tonic-gate
205*0Sstevel@tonic-gate if (mfp == NULL)
206*0Sstevel@tonic-gate ret = __memp_mf_open(dbmp,
207*0Sstevel@tonic-gate path, pagesize, last_pgno, finfop, &mfp);
208*0Sstevel@tonic-gate else {
209*0Sstevel@tonic-gate ++mfp->ref;
210*0Sstevel@tonic-gate ret = 0;
211*0Sstevel@tonic-gate }
212*0Sstevel@tonic-gate if (ret == 0 &&
213*0Sstevel@tonic-gate F_ISSET(dbmp, MP_LOCKHANDLE) && (ret =
214*0Sstevel@tonic-gate __memp_alloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0)
215*0Sstevel@tonic-gate LOCKINIT(dbmp, dbmfp->mutexp);
216*0Sstevel@tonic-gate
217*0Sstevel@tonic-gate if (needlock)
218*0Sstevel@tonic-gate UNLOCKREGION(dbmp);
219*0Sstevel@tonic-gate if (ret != 0)
220*0Sstevel@tonic-gate goto err;
221*0Sstevel@tonic-gate
222*0Sstevel@tonic-gate dbmfp->mfp = mfp;
223*0Sstevel@tonic-gate
224*0Sstevel@tonic-gate /*
225*0Sstevel@tonic-gate * If a file:
226*0Sstevel@tonic-gate * + is read-only
227*0Sstevel@tonic-gate * + isn't temporary
228*0Sstevel@tonic-gate * + doesn't require any pgin/pgout support
229*0Sstevel@tonic-gate * + the DB_NOMMAP flag wasn't set
230*0Sstevel@tonic-gate * + and is less than mp_mmapsize bytes in size
231*0Sstevel@tonic-gate *
232*0Sstevel@tonic-gate * we can mmap it instead of reading/writing buffers. Don't do error
233*0Sstevel@tonic-gate * checking based on the mmap call failure. We want to do normal I/O
234*0Sstevel@tonic-gate * on the file if the reason we failed was because the file was on an
235*0Sstevel@tonic-gate * NFS mounted partition, and we can fail in buffer I/O just as easily
236*0Sstevel@tonic-gate * as here.
237*0Sstevel@tonic-gate *
238*0Sstevel@tonic-gate * XXX
239*0Sstevel@tonic-gate * We'd like to test to see if the file is too big to mmap. Since we
240*0Sstevel@tonic-gate * don't know what size or type off_t's or size_t's are, or the largest
241*0Sstevel@tonic-gate * unsigned integral type is, or what random insanity the local C
242*0Sstevel@tonic-gate * compiler will perpetrate, doing the comparison in a portable way is
243*0Sstevel@tonic-gate * flatly impossible. Hope that mmap fails if the file is too large.
244*0Sstevel@tonic-gate */
245*0Sstevel@tonic-gate #define DB_MAXMMAPSIZE (10 * 1024 * 1024) /* 10 Mb. */
246*0Sstevel@tonic-gate if (F_ISSET(mfp, MP_CAN_MMAP)) {
247*0Sstevel@tonic-gate if (!F_ISSET(dbmfp, MP_READONLY))
248*0Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP);
249*0Sstevel@tonic-gate if (path == NULL)
250*0Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP);
251*0Sstevel@tonic-gate if (finfop->ftype != 0)
252*0Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP);
253*0Sstevel@tonic-gate if (LF_ISSET(DB_NOMMAP))
254*0Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP);
255*0Sstevel@tonic-gate maxmap = dbenv == NULL || dbenv->mp_mmapsize == 0 ?
256*0Sstevel@tonic-gate DB_MAXMMAPSIZE : dbenv->mp_mmapsize;
257*0Sstevel@tonic-gate if (mbytes > maxmap / MEGABYTE ||
258*0Sstevel@tonic-gate (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE))
259*0Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP);
260*0Sstevel@tonic-gate }
261*0Sstevel@tonic-gate dbmfp->addr = NULL;
262*0Sstevel@tonic-gate if (F_ISSET(mfp, MP_CAN_MMAP)) {
263*0Sstevel@tonic-gate dbmfp->len = (size_t)mbytes * MEGABYTE + bytes;
264*0Sstevel@tonic-gate if (__db_mapfile(rpath,
265*0Sstevel@tonic-gate dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) {
266*0Sstevel@tonic-gate dbmfp->addr = NULL;
267*0Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP);
268*0Sstevel@tonic-gate }
269*0Sstevel@tonic-gate }
270*0Sstevel@tonic-gate if (rpath != NULL)
271*0Sstevel@tonic-gate __os_freestr(rpath);
272*0Sstevel@tonic-gate
273*0Sstevel@tonic-gate LOCKHANDLE(dbmp, dbmp->mutexp);
274*0Sstevel@tonic-gate TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
275*0Sstevel@tonic-gate UNLOCKHANDLE(dbmp, dbmp->mutexp);
276*0Sstevel@tonic-gate
277*0Sstevel@tonic-gate *retp = dbmfp;
278*0Sstevel@tonic-gate return (0);
279*0Sstevel@tonic-gate
280*0Sstevel@tonic-gate err: /*
281*0Sstevel@tonic-gate * Note that we do not have to free the thread mutex, because we
282*0Sstevel@tonic-gate * never get to here after we have successfully allocated it.
283*0Sstevel@tonic-gate */
284*0Sstevel@tonic-gate if (rpath != NULL)
285*0Sstevel@tonic-gate __os_freestr(rpath);
286*0Sstevel@tonic-gate if (dbmfp->fd != -1)
287*0Sstevel@tonic-gate (void)__os_close(dbmfp->fd);
288*0Sstevel@tonic-gate if (dbmfp != NULL)
289*0Sstevel@tonic-gate __os_free(dbmfp, sizeof(DB_MPOOLFILE));
290*0Sstevel@tonic-gate return (ret);
291*0Sstevel@tonic-gate }
292*0Sstevel@tonic-gate
293*0Sstevel@tonic-gate /*
294*0Sstevel@tonic-gate * __memp_mf_open --
295*0Sstevel@tonic-gate * Open an MPOOLFILE.
296*0Sstevel@tonic-gate */
297*0Sstevel@tonic-gate static int
__memp_mf_open(dbmp,path,pagesize,last_pgno,finfop,retp)298*0Sstevel@tonic-gate __memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp)
299*0Sstevel@tonic-gate DB_MPOOL *dbmp;
300*0Sstevel@tonic-gate const char *path;
301*0Sstevel@tonic-gate size_t pagesize;
302*0Sstevel@tonic-gate db_pgno_t last_pgno;
303*0Sstevel@tonic-gate DB_MPOOL_FINFO *finfop;
304*0Sstevel@tonic-gate MPOOLFILE **retp;
305*0Sstevel@tonic-gate {
306*0Sstevel@tonic-gate MPOOLFILE *mfp;
307*0Sstevel@tonic-gate int ret;
308*0Sstevel@tonic-gate void *p;
309*0Sstevel@tonic-gate
310*0Sstevel@tonic-gate #define ISTEMPORARY (path == NULL)
311*0Sstevel@tonic-gate
312*0Sstevel@tonic-gate /*
313*0Sstevel@tonic-gate * Walk the list of MPOOLFILE's, looking for a matching file.
314*0Sstevel@tonic-gate * Temporary files can't match previous files.
315*0Sstevel@tonic-gate */
316*0Sstevel@tonic-gate if (!ISTEMPORARY)
317*0Sstevel@tonic-gate for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
318*0Sstevel@tonic-gate mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
319*0Sstevel@tonic-gate if (F_ISSET(mfp, MP_TEMP))
320*0Sstevel@tonic-gate continue;
321*0Sstevel@tonic-gate if (!memcmp(finfop->fileid,
322*0Sstevel@tonic-gate R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
323*0Sstevel@tonic-gate if (finfop->clear_len != mfp->clear_len ||
324*0Sstevel@tonic-gate finfop->ftype != mfp->ftype ||
325*0Sstevel@tonic-gate pagesize != mfp->stat.st_pagesize) {
326*0Sstevel@tonic-gate __db_err(dbmp->dbenv,
327*0Sstevel@tonic-gate "%s: ftype, clear length or pagesize changed",
328*0Sstevel@tonic-gate path);
329*0Sstevel@tonic-gate return (EINVAL);
330*0Sstevel@tonic-gate }
331*0Sstevel@tonic-gate
332*0Sstevel@tonic-gate /* Found it: increment the reference count. */
333*0Sstevel@tonic-gate ++mfp->ref;
334*0Sstevel@tonic-gate *retp = mfp;
335*0Sstevel@tonic-gate return (0);
336*0Sstevel@tonic-gate }
337*0Sstevel@tonic-gate }
338*0Sstevel@tonic-gate
339*0Sstevel@tonic-gate /* Allocate a new MPOOLFILE. */
340*0Sstevel@tonic-gate if ((ret = __memp_alloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
341*0Sstevel@tonic-gate return (ret);
342*0Sstevel@tonic-gate *retp = mfp;
343*0Sstevel@tonic-gate
344*0Sstevel@tonic-gate /* Initialize the structure. */
345*0Sstevel@tonic-gate memset(mfp, 0, sizeof(MPOOLFILE));
346*0Sstevel@tonic-gate mfp->ref = 1;
347*0Sstevel@tonic-gate mfp->ftype = finfop->ftype;
348*0Sstevel@tonic-gate mfp->lsn_off = finfop->lsn_offset;
349*0Sstevel@tonic-gate mfp->clear_len = finfop->clear_len;
350*0Sstevel@tonic-gate
351*0Sstevel@tonic-gate /*
352*0Sstevel@tonic-gate * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,
353*0Sstevel@tonic-gate * we have to know the last page in the file. Figure it out and save
354*0Sstevel@tonic-gate * it away.
355*0Sstevel@tonic-gate */
356*0Sstevel@tonic-gate mfp->stat.st_pagesize = pagesize;
357*0Sstevel@tonic-gate mfp->orig_last_pgno = mfp->last_pgno = last_pgno;
358*0Sstevel@tonic-gate
359*0Sstevel@tonic-gate if (ISTEMPORARY)
360*0Sstevel@tonic-gate F_SET(mfp, MP_TEMP);
361*0Sstevel@tonic-gate else {
362*0Sstevel@tonic-gate /* Copy the file path into shared memory. */
363*0Sstevel@tonic-gate if ((ret = __memp_alloc(dbmp,
364*0Sstevel@tonic-gate strlen(path) + 1, &mfp->path_off, &p)) != 0)
365*0Sstevel@tonic-gate goto err;
366*0Sstevel@tonic-gate memcpy(p, path, strlen(path) + 1);
367*0Sstevel@tonic-gate
368*0Sstevel@tonic-gate /* Copy the file identification string into shared memory. */
369*0Sstevel@tonic-gate if ((ret = __memp_alloc(dbmp,
370*0Sstevel@tonic-gate DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
371*0Sstevel@tonic-gate goto err;
372*0Sstevel@tonic-gate memcpy(p, finfop->fileid, DB_FILE_ID_LEN);
373*0Sstevel@tonic-gate
374*0Sstevel@tonic-gate F_SET(mfp, MP_CAN_MMAP);
375*0Sstevel@tonic-gate }
376*0Sstevel@tonic-gate
377*0Sstevel@tonic-gate /* Copy the page cookie into shared memory. */
378*0Sstevel@tonic-gate if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) {
379*0Sstevel@tonic-gate mfp->pgcookie_len = 0;
380*0Sstevel@tonic-gate mfp->pgcookie_off = 0;
381*0Sstevel@tonic-gate } else {
382*0Sstevel@tonic-gate if ((ret = __memp_alloc(dbmp,
383*0Sstevel@tonic-gate finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
384*0Sstevel@tonic-gate goto err;
385*0Sstevel@tonic-gate memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size);
386*0Sstevel@tonic-gate mfp->pgcookie_len = finfop->pgcookie->size;
387*0Sstevel@tonic-gate }
388*0Sstevel@tonic-gate
389*0Sstevel@tonic-gate /* Prepend the MPOOLFILE to the list of MPOOLFILE's. */
390*0Sstevel@tonic-gate SH_TAILQ_INSERT_HEAD(&dbmp->mp->mpfq, mfp, q, __mpoolfile);
391*0Sstevel@tonic-gate
392*0Sstevel@tonic-gate if (0) {
393*0Sstevel@tonic-gate err: if (mfp->path_off != 0)
394*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr,
395*0Sstevel@tonic-gate R_ADDR(dbmp, mfp->path_off));
396*0Sstevel@tonic-gate if (mfp->fileid_off != 0)
397*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr,
398*0Sstevel@tonic-gate R_ADDR(dbmp, mfp->fileid_off));
399*0Sstevel@tonic-gate if (mfp != NULL)
400*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, mfp);
401*0Sstevel@tonic-gate mfp = NULL;
402*0Sstevel@tonic-gate }
403*0Sstevel@tonic-gate return (0);
404*0Sstevel@tonic-gate }
405*0Sstevel@tonic-gate
406*0Sstevel@tonic-gate /*
407*0Sstevel@tonic-gate * memp_fclose --
408*0Sstevel@tonic-gate * Close a backing file for the memory pool.
409*0Sstevel@tonic-gate */
410*0Sstevel@tonic-gate int
memp_fclose(dbmfp)411*0Sstevel@tonic-gate memp_fclose(dbmfp)
412*0Sstevel@tonic-gate DB_MPOOLFILE *dbmfp;
413*0Sstevel@tonic-gate {
414*0Sstevel@tonic-gate DB_MPOOL *dbmp;
415*0Sstevel@tonic-gate int ret, t_ret;
416*0Sstevel@tonic-gate
417*0Sstevel@tonic-gate dbmp = dbmfp->dbmp;
418*0Sstevel@tonic-gate ret = 0;
419*0Sstevel@tonic-gate
420*0Sstevel@tonic-gate MP_PANIC_CHECK(dbmp);
421*0Sstevel@tonic-gate
422*0Sstevel@tonic-gate for (;;) {
423*0Sstevel@tonic-gate LOCKHANDLE(dbmp, dbmp->mutexp);
424*0Sstevel@tonic-gate
425*0Sstevel@tonic-gate /*
426*0Sstevel@tonic-gate * We have to reference count DB_MPOOLFILE structures as other
427*0Sstevel@tonic-gate * threads may be using them. The problem only happens if the
428*0Sstevel@tonic-gate * application makes a bad design choice. Here's the path:
429*0Sstevel@tonic-gate *
430*0Sstevel@tonic-gate * Thread A opens a database.
431*0Sstevel@tonic-gate * Thread B uses thread A's DB_MPOOLFILE to write a buffer
432*0Sstevel@tonic-gate * in order to free up memory in the mpool cache.
433*0Sstevel@tonic-gate * Thread A closes the database while thread B is using the
434*0Sstevel@tonic-gate * DB_MPOOLFILE structure.
435*0Sstevel@tonic-gate *
436*0Sstevel@tonic-gate * By opening all databases before creating the threads, and
437*0Sstevel@tonic-gate * closing them after the threads have exited, applications
438*0Sstevel@tonic-gate * get better performance and avoid the problem path entirely.
439*0Sstevel@tonic-gate *
440*0Sstevel@tonic-gate * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer
441*0Sstevel@tonic-gate * is a short-term lock, even in worst case, since we better be
442*0Sstevel@tonic-gate * the only thread of control using the DB_MPOOLFILE structure
443*0Sstevel@tonic-gate * to read pages *into* the cache. Wait until we're the only
444*0Sstevel@tonic-gate * reference holder and remove the DB_MPOOLFILE structure from
445*0Sstevel@tonic-gate * the list, so nobody else can even find it.
446*0Sstevel@tonic-gate */
447*0Sstevel@tonic-gate if (dbmfp->ref == 1) {
448*0Sstevel@tonic-gate TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q);
449*0Sstevel@tonic-gate break;
450*0Sstevel@tonic-gate }
451*0Sstevel@tonic-gate UNLOCKHANDLE(dbmp, dbmp->mutexp);
452*0Sstevel@tonic-gate
453*0Sstevel@tonic-gate (void)__os_sleep(1, 0);
454*0Sstevel@tonic-gate }
455*0Sstevel@tonic-gate UNLOCKHANDLE(dbmp, dbmp->mutexp);
456*0Sstevel@tonic-gate
457*0Sstevel@tonic-gate /* Complain if pinned blocks never returned. */
458*0Sstevel@tonic-gate if (dbmfp->pinref != 0)
459*0Sstevel@tonic-gate __db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned",
460*0Sstevel@tonic-gate __memp_fn(dbmfp), (u_long)dbmfp->pinref);
461*0Sstevel@tonic-gate
462*0Sstevel@tonic-gate /* Close the underlying MPOOLFILE. */
463*0Sstevel@tonic-gate (void)__memp_mf_close(dbmp, dbmfp);
464*0Sstevel@tonic-gate
465*0Sstevel@tonic-gate /* Discard any mmap information. */
466*0Sstevel@tonic-gate if (dbmfp->addr != NULL &&
467*0Sstevel@tonic-gate (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0)
468*0Sstevel@tonic-gate __db_err(dbmp->dbenv,
469*0Sstevel@tonic-gate "%s: %s", __memp_fn(dbmfp), strerror(ret));
470*0Sstevel@tonic-gate
471*0Sstevel@tonic-gate /* Close the file; temporary files may not yet have been created. */
472*0Sstevel@tonic-gate if (dbmfp->fd != -1 && (t_ret = __os_close(dbmfp->fd)) != 0) {
473*0Sstevel@tonic-gate __db_err(dbmp->dbenv,
474*0Sstevel@tonic-gate "%s: %s", __memp_fn(dbmfp), strerror(t_ret));
475*0Sstevel@tonic-gate if (ret != 0)
476*0Sstevel@tonic-gate t_ret = ret;
477*0Sstevel@tonic-gate }
478*0Sstevel@tonic-gate
479*0Sstevel@tonic-gate /* Free memory. */
480*0Sstevel@tonic-gate if (dbmfp->mutexp != NULL) {
481*0Sstevel@tonic-gate LOCKREGION(dbmp);
482*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, dbmfp->mutexp);
483*0Sstevel@tonic-gate UNLOCKREGION(dbmp);
484*0Sstevel@tonic-gate }
485*0Sstevel@tonic-gate
486*0Sstevel@tonic-gate /* Discard the DB_MPOOLFILE structure. */
487*0Sstevel@tonic-gate __os_free(dbmfp, sizeof(DB_MPOOLFILE));
488*0Sstevel@tonic-gate
489*0Sstevel@tonic-gate return (ret);
490*0Sstevel@tonic-gate }
491*0Sstevel@tonic-gate
492*0Sstevel@tonic-gate /*
493*0Sstevel@tonic-gate * __memp_mf_close --
494*0Sstevel@tonic-gate * Close down an MPOOLFILE.
495*0Sstevel@tonic-gate */
496*0Sstevel@tonic-gate static int
__memp_mf_close(dbmp,dbmfp)497*0Sstevel@tonic-gate __memp_mf_close(dbmp, dbmfp)
498*0Sstevel@tonic-gate DB_MPOOL *dbmp;
499*0Sstevel@tonic-gate DB_MPOOLFILE *dbmfp;
500*0Sstevel@tonic-gate {
501*0Sstevel@tonic-gate BH *bhp, *nbhp;
502*0Sstevel@tonic-gate MPOOL *mp;
503*0Sstevel@tonic-gate MPOOLFILE *mfp;
504*0Sstevel@tonic-gate size_t mf_offset;
505*0Sstevel@tonic-gate
506*0Sstevel@tonic-gate mp = dbmp->mp;
507*0Sstevel@tonic-gate mfp = dbmfp->mfp;
508*0Sstevel@tonic-gate
509*0Sstevel@tonic-gate LOCKREGION(dbmp);
510*0Sstevel@tonic-gate
511*0Sstevel@tonic-gate /* If more than a single reference, simply decrement. */
512*0Sstevel@tonic-gate if (mfp->ref > 1) {
513*0Sstevel@tonic-gate --mfp->ref;
514*0Sstevel@tonic-gate goto ret1;
515*0Sstevel@tonic-gate }
516*0Sstevel@tonic-gate
517*0Sstevel@tonic-gate /*
518*0Sstevel@tonic-gate * Move any BH's held by the file to the free list. We don't free the
519*0Sstevel@tonic-gate * memory itself because we may be discarding the memory pool, and it's
520*0Sstevel@tonic-gate * fairly expensive to reintegrate the buffers back into the region for
521*0Sstevel@tonic-gate * no purpose.
522*0Sstevel@tonic-gate */
523*0Sstevel@tonic-gate mf_offset = R_OFFSET(dbmp, mfp);
524*0Sstevel@tonic-gate for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
525*0Sstevel@tonic-gate nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
526*0Sstevel@tonic-gate
527*0Sstevel@tonic-gate #ifdef DEBUG_NO_DIRTY
528*0Sstevel@tonic-gate /* Complain if we find any blocks that were left dirty. */
529*0Sstevel@tonic-gate if (F_ISSET(bhp, BH_DIRTY))
530*0Sstevel@tonic-gate __db_err(dbmp->dbenv,
531*0Sstevel@tonic-gate "%s: close: pgno %lu left dirty; ref %lu",
532*0Sstevel@tonic-gate __memp_fn(dbmfp),
533*0Sstevel@tonic-gate (u_long)bhp->pgno, (u_long)bhp->ref);
534*0Sstevel@tonic-gate #endif
535*0Sstevel@tonic-gate
536*0Sstevel@tonic-gate if (bhp->mf_offset == mf_offset) {
537*0Sstevel@tonic-gate if (F_ISSET(bhp, BH_DIRTY)) {
538*0Sstevel@tonic-gate ++mp->stat.st_page_clean;
539*0Sstevel@tonic-gate --mp->stat.st_page_dirty;
540*0Sstevel@tonic-gate }
541*0Sstevel@tonic-gate __memp_bhfree(dbmp, mfp, bhp, 0);
542*0Sstevel@tonic-gate SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh);
543*0Sstevel@tonic-gate }
544*0Sstevel@tonic-gate }
545*0Sstevel@tonic-gate
546*0Sstevel@tonic-gate /* Delete from the list of MPOOLFILEs. */
547*0Sstevel@tonic-gate SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
548*0Sstevel@tonic-gate
549*0Sstevel@tonic-gate /* Free the space. */
550*0Sstevel@tonic-gate if (mfp->path_off != 0)
551*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
552*0Sstevel@tonic-gate if (mfp->fileid_off != 0)
553*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
554*0Sstevel@tonic-gate if (mfp->pgcookie_off != 0)
555*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off));
556*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, mfp);
557*0Sstevel@tonic-gate
558*0Sstevel@tonic-gate ret1: UNLOCKREGION(dbmp);
559*0Sstevel@tonic-gate return (0);
560*0Sstevel@tonic-gate }
561