xref: /onnv-gate/usr/src/cmd/sendmail/db/mp/mp_fopen.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*-
2*0Sstevel@tonic-gate  * See the file LICENSE for redistribution information.
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * Copyright (c) 1996, 1997, 1998
5*0Sstevel@tonic-gate  *	Sleepycat Software.  All rights reserved.
6*0Sstevel@tonic-gate  */
7*0Sstevel@tonic-gate #include "config.h"
8*0Sstevel@tonic-gate 
9*0Sstevel@tonic-gate #ifndef lint
10*0Sstevel@tonic-gate static const char sccsid[] = "@(#)mp_fopen.c	10.60 (Sleepycat) 1/1/99";
11*0Sstevel@tonic-gate #endif /* not lint */
12*0Sstevel@tonic-gate 
13*0Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
14*0Sstevel@tonic-gate #include <sys/types.h>
15*0Sstevel@tonic-gate 
16*0Sstevel@tonic-gate #include <errno.h>
17*0Sstevel@tonic-gate #include <string.h>
18*0Sstevel@tonic-gate #endif
19*0Sstevel@tonic-gate 
20*0Sstevel@tonic-gate #include "db_int.h"
21*0Sstevel@tonic-gate #include "shqueue.h"
22*0Sstevel@tonic-gate #include "db_shash.h"
23*0Sstevel@tonic-gate #include "mp.h"
24*0Sstevel@tonic-gate #include "common_ext.h"
25*0Sstevel@tonic-gate 
26*0Sstevel@tonic-gate static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
27*0Sstevel@tonic-gate static int __memp_mf_open __P((DB_MPOOL *,
28*0Sstevel@tonic-gate     const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **));
29*0Sstevel@tonic-gate 
30*0Sstevel@tonic-gate /*
31*0Sstevel@tonic-gate  * memp_fopen --
32*0Sstevel@tonic-gate  *	Open a backing file for the memory pool.
33*0Sstevel@tonic-gate  */
34*0Sstevel@tonic-gate int
memp_fopen(dbmp,path,flags,mode,pagesize,finfop,retp)35*0Sstevel@tonic-gate memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp)
36*0Sstevel@tonic-gate 	DB_MPOOL *dbmp;
37*0Sstevel@tonic-gate 	const char *path;
38*0Sstevel@tonic-gate 	u_int32_t flags;
39*0Sstevel@tonic-gate 	int mode;
40*0Sstevel@tonic-gate 	size_t pagesize;
41*0Sstevel@tonic-gate 	DB_MPOOL_FINFO *finfop;
42*0Sstevel@tonic-gate 	DB_MPOOLFILE **retp;
43*0Sstevel@tonic-gate {
44*0Sstevel@tonic-gate 	int ret;
45*0Sstevel@tonic-gate 
46*0Sstevel@tonic-gate 	MP_PANIC_CHECK(dbmp);
47*0Sstevel@tonic-gate 
48*0Sstevel@tonic-gate 	/* Validate arguments. */
49*0Sstevel@tonic-gate 	if ((ret = __db_fchk(dbmp->dbenv,
50*0Sstevel@tonic-gate 	    "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0)
51*0Sstevel@tonic-gate 		return (ret);
52*0Sstevel@tonic-gate 
53*0Sstevel@tonic-gate 	/* Require a non-zero pagesize. */
54*0Sstevel@tonic-gate 	if (pagesize == 0) {
55*0Sstevel@tonic-gate 		__db_err(dbmp->dbenv, "memp_fopen: pagesize not specified");
56*0Sstevel@tonic-gate 		return (EINVAL);
57*0Sstevel@tonic-gate 	}
58*0Sstevel@tonic-gate 	if (finfop != NULL && finfop->clear_len > pagesize)
59*0Sstevel@tonic-gate 		return (EINVAL);
60*0Sstevel@tonic-gate 
61*0Sstevel@tonic-gate 	return (__memp_fopen(dbmp,
62*0Sstevel@tonic-gate 	    NULL, path, flags, mode, pagesize, 1, finfop, retp));
63*0Sstevel@tonic-gate }
64*0Sstevel@tonic-gate 
65*0Sstevel@tonic-gate /*
66*0Sstevel@tonic-gate  * __memp_fopen --
67*0Sstevel@tonic-gate  *	Open a backing file for the memory pool; internal version.
68*0Sstevel@tonic-gate  *
69*0Sstevel@tonic-gate  * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *,
70*0Sstevel@tonic-gate  * PUBLIC:    u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
71*0Sstevel@tonic-gate  */
72*0Sstevel@tonic-gate int
__memp_fopen(dbmp,mfp,path,flags,mode,pagesize,needlock,finfop,retp)73*0Sstevel@tonic-gate __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp)
74*0Sstevel@tonic-gate 	DB_MPOOL *dbmp;
75*0Sstevel@tonic-gate 	MPOOLFILE *mfp;
76*0Sstevel@tonic-gate 	const char *path;
77*0Sstevel@tonic-gate 	u_int32_t flags;
78*0Sstevel@tonic-gate 	int mode, needlock;
79*0Sstevel@tonic-gate 	size_t pagesize;
80*0Sstevel@tonic-gate 	DB_MPOOL_FINFO *finfop;
81*0Sstevel@tonic-gate 	DB_MPOOLFILE **retp;
82*0Sstevel@tonic-gate {
83*0Sstevel@tonic-gate 	DB_ENV *dbenv;
84*0Sstevel@tonic-gate 	DB_MPOOLFILE *dbmfp;
85*0Sstevel@tonic-gate 	DB_MPOOL_FINFO finfo;
86*0Sstevel@tonic-gate 	db_pgno_t last_pgno;
87*0Sstevel@tonic-gate 	size_t maxmap;
88*0Sstevel@tonic-gate 	u_int32_t mbytes, bytes;
89*0Sstevel@tonic-gate 	int ret;
90*0Sstevel@tonic-gate 	u_int8_t idbuf[DB_FILE_ID_LEN];
91*0Sstevel@tonic-gate 	char *rpath;
92*0Sstevel@tonic-gate 
93*0Sstevel@tonic-gate 	dbenv = dbmp->dbenv;
94*0Sstevel@tonic-gate 	ret = 0;
95*0Sstevel@tonic-gate 	rpath = NULL;
96*0Sstevel@tonic-gate 
97*0Sstevel@tonic-gate 	/*
98*0Sstevel@tonic-gate 	 * If mfp is provided, we take the DB_MPOOL_FINFO information from
99*0Sstevel@tonic-gate 	 * the mfp.  We don't bother initializing everything, because some
100*0Sstevel@tonic-gate 	 * of them are expensive to acquire.  If no mfp is provided and the
101*0Sstevel@tonic-gate 	 * finfop argument is NULL, we default the values.
102*0Sstevel@tonic-gate 	 */
103*0Sstevel@tonic-gate 	if (finfop == NULL) {
104*0Sstevel@tonic-gate 		memset(&finfo, 0, sizeof(finfo));
105*0Sstevel@tonic-gate 		if (mfp != NULL) {
106*0Sstevel@tonic-gate 			finfo.ftype = mfp->ftype;
107*0Sstevel@tonic-gate 			finfo.pgcookie = NULL;
108*0Sstevel@tonic-gate 			finfo.fileid = NULL;
109*0Sstevel@tonic-gate 			finfo.lsn_offset = mfp->lsn_off;
110*0Sstevel@tonic-gate 			finfo.clear_len = mfp->clear_len;
111*0Sstevel@tonic-gate 		} else {
112*0Sstevel@tonic-gate 			finfo.ftype = 0;
113*0Sstevel@tonic-gate 			finfo.pgcookie = NULL;
114*0Sstevel@tonic-gate 			finfo.fileid = NULL;
115*0Sstevel@tonic-gate 			finfo.lsn_offset = -1;
116*0Sstevel@tonic-gate 			finfo.clear_len = 0;
117*0Sstevel@tonic-gate 		}
118*0Sstevel@tonic-gate 		finfop = &finfo;
119*0Sstevel@tonic-gate 	}
120*0Sstevel@tonic-gate 
121*0Sstevel@tonic-gate 	/* Allocate and initialize the per-process structure. */
122*0Sstevel@tonic-gate 	if ((ret = __os_calloc(1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0)
123*0Sstevel@tonic-gate 		return (ret);
124*0Sstevel@tonic-gate 	dbmfp->dbmp = dbmp;
125*0Sstevel@tonic-gate 	dbmfp->fd = -1;
126*0Sstevel@tonic-gate 	dbmfp->ref = 1;
127*0Sstevel@tonic-gate 	if (LF_ISSET(DB_RDONLY))
128*0Sstevel@tonic-gate 		F_SET(dbmfp, MP_READONLY);
129*0Sstevel@tonic-gate 
130*0Sstevel@tonic-gate 	if (path == NULL) {
131*0Sstevel@tonic-gate 		if (LF_ISSET(DB_RDONLY)) {
132*0Sstevel@tonic-gate 			__db_err(dbenv,
133*0Sstevel@tonic-gate 			    "memp_fopen: temporary files can't be readonly");
134*0Sstevel@tonic-gate 			ret = EINVAL;
135*0Sstevel@tonic-gate 			goto err;
136*0Sstevel@tonic-gate 		}
137*0Sstevel@tonic-gate 		last_pgno = 0;
138*0Sstevel@tonic-gate 	} else {
139*0Sstevel@tonic-gate 		/* Get the real name for this file and open it. */
140*0Sstevel@tonic-gate 		if ((ret = __db_appname(dbenv,
141*0Sstevel@tonic-gate 		    DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0)
142*0Sstevel@tonic-gate 			goto err;
143*0Sstevel@tonic-gate 		if ((ret = __db_open(rpath,
144*0Sstevel@tonic-gate 		   LF_ISSET(DB_CREATE | DB_RDONLY),
145*0Sstevel@tonic-gate 		   DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) {
146*0Sstevel@tonic-gate 			__db_err(dbenv, "%s: %s", rpath, strerror(ret));
147*0Sstevel@tonic-gate 			goto err;
148*0Sstevel@tonic-gate 		}
149*0Sstevel@tonic-gate 
150*0Sstevel@tonic-gate 		/*
151*0Sstevel@tonic-gate 		 * Don't permit files that aren't a multiple of the pagesize,
152*0Sstevel@tonic-gate 		 * and find the number of the last page in the file, all the
153*0Sstevel@tonic-gate 		 * time being careful not to overflow 32 bits.
154*0Sstevel@tonic-gate 		 *
155*0Sstevel@tonic-gate 		 * !!!
156*0Sstevel@tonic-gate 		 * We can't use off_t's here, or in any code in the mainline
157*0Sstevel@tonic-gate 		 * library for that matter.  (We have to use them in the os
158*0Sstevel@tonic-gate 		 * stubs, of course, as there are system calls that take them
159*0Sstevel@tonic-gate 		 * as arguments.)  The reason is that some customers build in
160*0Sstevel@tonic-gate 		 * environments where an off_t is 32-bits, but still run where
161*0Sstevel@tonic-gate 		 * offsets are 64-bits, and they pay us a lot of money.
162*0Sstevel@tonic-gate 		 */
163*0Sstevel@tonic-gate 		if ((ret = __os_ioinfo(rpath,
164*0Sstevel@tonic-gate 		    dbmfp->fd, &mbytes, &bytes, NULL)) != 0) {
165*0Sstevel@tonic-gate 			__db_err(dbenv, "%s: %s", rpath, strerror(ret));
166*0Sstevel@tonic-gate 			goto err;
167*0Sstevel@tonic-gate 		}
168*0Sstevel@tonic-gate 
169*0Sstevel@tonic-gate 		/* Page sizes have to be a power-of-two, ignore mbytes. */
170*0Sstevel@tonic-gate 		if (bytes % pagesize != 0) {
171*0Sstevel@tonic-gate 			__db_err(dbenv,
172*0Sstevel@tonic-gate 			    "%s: file size not a multiple of the pagesize",
173*0Sstevel@tonic-gate 			    rpath);
174*0Sstevel@tonic-gate 			ret = EINVAL;
175*0Sstevel@tonic-gate 			goto err;
176*0Sstevel@tonic-gate 		}
177*0Sstevel@tonic-gate 
178*0Sstevel@tonic-gate 		last_pgno = mbytes * (MEGABYTE / pagesize);
179*0Sstevel@tonic-gate 		last_pgno += bytes / pagesize;
180*0Sstevel@tonic-gate 
181*0Sstevel@tonic-gate 		/* Correction: page numbers are zero-based, not 1-based. */
182*0Sstevel@tonic-gate 		if (last_pgno != 0)
183*0Sstevel@tonic-gate 			--last_pgno;
184*0Sstevel@tonic-gate 
185*0Sstevel@tonic-gate 		/*
186*0Sstevel@tonic-gate 		 * Get the file id if we weren't given one.  Generated file id's
187*0Sstevel@tonic-gate 		 * don't use timestamps, otherwise there'd be no chance of any
188*0Sstevel@tonic-gate 		 * other process joining the party.
189*0Sstevel@tonic-gate 		 */
190*0Sstevel@tonic-gate 		if (finfop->fileid == NULL) {
191*0Sstevel@tonic-gate 			if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0)
192*0Sstevel@tonic-gate 				goto err;
193*0Sstevel@tonic-gate 			finfop->fileid = idbuf;
194*0Sstevel@tonic-gate 		}
195*0Sstevel@tonic-gate 	}
196*0Sstevel@tonic-gate 
197*0Sstevel@tonic-gate 	/*
198*0Sstevel@tonic-gate 	 * If we weren't provided an underlying shared object to join with,
199*0Sstevel@tonic-gate 	 * find/allocate the shared file objects.  Also allocate space for
200*0Sstevel@tonic-gate 	 * for the per-process thread lock.
201*0Sstevel@tonic-gate 	 */
202*0Sstevel@tonic-gate 	if (needlock)
203*0Sstevel@tonic-gate 		LOCKREGION(dbmp);
204*0Sstevel@tonic-gate 
205*0Sstevel@tonic-gate 	if (mfp == NULL)
206*0Sstevel@tonic-gate 		ret = __memp_mf_open(dbmp,
207*0Sstevel@tonic-gate 		    path, pagesize, last_pgno, finfop, &mfp);
208*0Sstevel@tonic-gate 	else {
209*0Sstevel@tonic-gate 		++mfp->ref;
210*0Sstevel@tonic-gate 		ret = 0;
211*0Sstevel@tonic-gate 	}
212*0Sstevel@tonic-gate 	if (ret == 0 &&
213*0Sstevel@tonic-gate 	    F_ISSET(dbmp, MP_LOCKHANDLE) && (ret =
214*0Sstevel@tonic-gate 	    __memp_alloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0)
215*0Sstevel@tonic-gate 		LOCKINIT(dbmp, dbmfp->mutexp);
216*0Sstevel@tonic-gate 
217*0Sstevel@tonic-gate 	if (needlock)
218*0Sstevel@tonic-gate 		UNLOCKREGION(dbmp);
219*0Sstevel@tonic-gate 	if (ret != 0)
220*0Sstevel@tonic-gate 		goto err;
221*0Sstevel@tonic-gate 
222*0Sstevel@tonic-gate 	dbmfp->mfp = mfp;
223*0Sstevel@tonic-gate 
224*0Sstevel@tonic-gate 	/*
225*0Sstevel@tonic-gate 	 * If a file:
226*0Sstevel@tonic-gate 	 *	+ is read-only
227*0Sstevel@tonic-gate 	 *	+ isn't temporary
228*0Sstevel@tonic-gate 	 *	+ doesn't require any pgin/pgout support
229*0Sstevel@tonic-gate 	 *	+ the DB_NOMMAP flag wasn't set
230*0Sstevel@tonic-gate 	 *	+ and is less than mp_mmapsize bytes in size
231*0Sstevel@tonic-gate 	 *
232*0Sstevel@tonic-gate 	 * we can mmap it instead of reading/writing buffers.  Don't do error
233*0Sstevel@tonic-gate 	 * checking based on the mmap call failure.  We want to do normal I/O
234*0Sstevel@tonic-gate 	 * on the file if the reason we failed was because the file was on an
235*0Sstevel@tonic-gate 	 * NFS mounted partition, and we can fail in buffer I/O just as easily
236*0Sstevel@tonic-gate 	 * as here.
237*0Sstevel@tonic-gate 	 *
238*0Sstevel@tonic-gate 	 * XXX
239*0Sstevel@tonic-gate 	 * We'd like to test to see if the file is too big to mmap.  Since we
240*0Sstevel@tonic-gate 	 * don't know what size or type off_t's or size_t's are, or the largest
241*0Sstevel@tonic-gate 	 * unsigned integral type is, or what random insanity the local C
242*0Sstevel@tonic-gate 	 * compiler will perpetrate, doing the comparison in a portable way is
243*0Sstevel@tonic-gate 	 * flatly impossible.  Hope that mmap fails if the file is too large.
244*0Sstevel@tonic-gate 	 */
245*0Sstevel@tonic-gate #define	DB_MAXMMAPSIZE	(10 * 1024 * 1024)	/* 10 Mb. */
246*0Sstevel@tonic-gate 	if (F_ISSET(mfp, MP_CAN_MMAP)) {
247*0Sstevel@tonic-gate 		if (!F_ISSET(dbmfp, MP_READONLY))
248*0Sstevel@tonic-gate 			F_CLR(mfp, MP_CAN_MMAP);
249*0Sstevel@tonic-gate 		if (path == NULL)
250*0Sstevel@tonic-gate 			F_CLR(mfp, MP_CAN_MMAP);
251*0Sstevel@tonic-gate 		if (finfop->ftype != 0)
252*0Sstevel@tonic-gate 			F_CLR(mfp, MP_CAN_MMAP);
253*0Sstevel@tonic-gate 		if (LF_ISSET(DB_NOMMAP))
254*0Sstevel@tonic-gate 			F_CLR(mfp, MP_CAN_MMAP);
255*0Sstevel@tonic-gate 		maxmap = dbenv == NULL || dbenv->mp_mmapsize == 0 ?
256*0Sstevel@tonic-gate 		    DB_MAXMMAPSIZE : dbenv->mp_mmapsize;
257*0Sstevel@tonic-gate 		if (mbytes > maxmap / MEGABYTE ||
258*0Sstevel@tonic-gate 		    (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE))
259*0Sstevel@tonic-gate 			F_CLR(mfp, MP_CAN_MMAP);
260*0Sstevel@tonic-gate 	}
261*0Sstevel@tonic-gate 	dbmfp->addr = NULL;
262*0Sstevel@tonic-gate 	if (F_ISSET(mfp, MP_CAN_MMAP)) {
263*0Sstevel@tonic-gate 		dbmfp->len = (size_t)mbytes * MEGABYTE + bytes;
264*0Sstevel@tonic-gate 		if (__db_mapfile(rpath,
265*0Sstevel@tonic-gate 		    dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) {
266*0Sstevel@tonic-gate 			dbmfp->addr = NULL;
267*0Sstevel@tonic-gate 			F_CLR(mfp, MP_CAN_MMAP);
268*0Sstevel@tonic-gate 		}
269*0Sstevel@tonic-gate 	}
270*0Sstevel@tonic-gate 	if (rpath != NULL)
271*0Sstevel@tonic-gate 		__os_freestr(rpath);
272*0Sstevel@tonic-gate 
273*0Sstevel@tonic-gate 	LOCKHANDLE(dbmp, dbmp->mutexp);
274*0Sstevel@tonic-gate 	TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
275*0Sstevel@tonic-gate 	UNLOCKHANDLE(dbmp, dbmp->mutexp);
276*0Sstevel@tonic-gate 
277*0Sstevel@tonic-gate 	*retp = dbmfp;
278*0Sstevel@tonic-gate 	return (0);
279*0Sstevel@tonic-gate 
280*0Sstevel@tonic-gate err:	/*
281*0Sstevel@tonic-gate 	 * Note that we do not have to free the thread mutex, because we
282*0Sstevel@tonic-gate 	 * never get to here after we have successfully allocated it.
283*0Sstevel@tonic-gate 	 */
284*0Sstevel@tonic-gate 	if (rpath != NULL)
285*0Sstevel@tonic-gate 		__os_freestr(rpath);
286*0Sstevel@tonic-gate 	if (dbmfp->fd != -1)
287*0Sstevel@tonic-gate 		(void)__os_close(dbmfp->fd);
288*0Sstevel@tonic-gate 	if (dbmfp != NULL)
289*0Sstevel@tonic-gate 		__os_free(dbmfp, sizeof(DB_MPOOLFILE));
290*0Sstevel@tonic-gate 	return (ret);
291*0Sstevel@tonic-gate }
292*0Sstevel@tonic-gate 
293*0Sstevel@tonic-gate /*
294*0Sstevel@tonic-gate  * __memp_mf_open --
295*0Sstevel@tonic-gate  *	Open an MPOOLFILE.
296*0Sstevel@tonic-gate  */
297*0Sstevel@tonic-gate static int
__memp_mf_open(dbmp,path,pagesize,last_pgno,finfop,retp)298*0Sstevel@tonic-gate __memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp)
299*0Sstevel@tonic-gate 	DB_MPOOL *dbmp;
300*0Sstevel@tonic-gate 	const char *path;
301*0Sstevel@tonic-gate 	size_t pagesize;
302*0Sstevel@tonic-gate 	db_pgno_t last_pgno;
303*0Sstevel@tonic-gate 	DB_MPOOL_FINFO *finfop;
304*0Sstevel@tonic-gate 	MPOOLFILE **retp;
305*0Sstevel@tonic-gate {
306*0Sstevel@tonic-gate 	MPOOLFILE *mfp;
307*0Sstevel@tonic-gate 	int ret;
308*0Sstevel@tonic-gate 	void *p;
309*0Sstevel@tonic-gate 
310*0Sstevel@tonic-gate #define	ISTEMPORARY	(path == NULL)
311*0Sstevel@tonic-gate 
312*0Sstevel@tonic-gate 	/*
313*0Sstevel@tonic-gate 	 * Walk the list of MPOOLFILE's, looking for a matching file.
314*0Sstevel@tonic-gate 	 * Temporary files can't match previous files.
315*0Sstevel@tonic-gate 	 */
316*0Sstevel@tonic-gate 	if (!ISTEMPORARY)
317*0Sstevel@tonic-gate 		for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
318*0Sstevel@tonic-gate 		    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
319*0Sstevel@tonic-gate 			if (F_ISSET(mfp, MP_TEMP))
320*0Sstevel@tonic-gate 				continue;
321*0Sstevel@tonic-gate 			if (!memcmp(finfop->fileid,
322*0Sstevel@tonic-gate 			    R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
323*0Sstevel@tonic-gate 				if (finfop->clear_len != mfp->clear_len ||
324*0Sstevel@tonic-gate 				    finfop->ftype != mfp->ftype ||
325*0Sstevel@tonic-gate 				    pagesize != mfp->stat.st_pagesize) {
326*0Sstevel@tonic-gate 					__db_err(dbmp->dbenv,
327*0Sstevel@tonic-gate 			    "%s: ftype, clear length or pagesize changed",
328*0Sstevel@tonic-gate 					    path);
329*0Sstevel@tonic-gate 					return (EINVAL);
330*0Sstevel@tonic-gate 				}
331*0Sstevel@tonic-gate 
332*0Sstevel@tonic-gate 				/* Found it: increment the reference count. */
333*0Sstevel@tonic-gate 				++mfp->ref;
334*0Sstevel@tonic-gate 				*retp = mfp;
335*0Sstevel@tonic-gate 				return (0);
336*0Sstevel@tonic-gate 			}
337*0Sstevel@tonic-gate 		}
338*0Sstevel@tonic-gate 
339*0Sstevel@tonic-gate 	/* Allocate a new MPOOLFILE. */
340*0Sstevel@tonic-gate 	if ((ret = __memp_alloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
341*0Sstevel@tonic-gate 		return (ret);
342*0Sstevel@tonic-gate 	*retp = mfp;
343*0Sstevel@tonic-gate 
344*0Sstevel@tonic-gate 	/* Initialize the structure. */
345*0Sstevel@tonic-gate 	memset(mfp, 0, sizeof(MPOOLFILE));
346*0Sstevel@tonic-gate 	mfp->ref = 1;
347*0Sstevel@tonic-gate 	mfp->ftype = finfop->ftype;
348*0Sstevel@tonic-gate 	mfp->lsn_off = finfop->lsn_offset;
349*0Sstevel@tonic-gate 	mfp->clear_len = finfop->clear_len;
350*0Sstevel@tonic-gate 
351*0Sstevel@tonic-gate 	/*
352*0Sstevel@tonic-gate 	 * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,
353*0Sstevel@tonic-gate 	 * we have to know the last page in the file.  Figure it out and save
354*0Sstevel@tonic-gate 	 * it away.
355*0Sstevel@tonic-gate 	 */
356*0Sstevel@tonic-gate 	mfp->stat.st_pagesize = pagesize;
357*0Sstevel@tonic-gate 	mfp->orig_last_pgno = mfp->last_pgno = last_pgno;
358*0Sstevel@tonic-gate 
359*0Sstevel@tonic-gate 	if (ISTEMPORARY)
360*0Sstevel@tonic-gate 		F_SET(mfp, MP_TEMP);
361*0Sstevel@tonic-gate 	else {
362*0Sstevel@tonic-gate 		/* Copy the file path into shared memory. */
363*0Sstevel@tonic-gate 		if ((ret = __memp_alloc(dbmp,
364*0Sstevel@tonic-gate 		    strlen(path) + 1, &mfp->path_off, &p)) != 0)
365*0Sstevel@tonic-gate 			goto err;
366*0Sstevel@tonic-gate 		memcpy(p, path, strlen(path) + 1);
367*0Sstevel@tonic-gate 
368*0Sstevel@tonic-gate 		/* Copy the file identification string into shared memory. */
369*0Sstevel@tonic-gate 		if ((ret = __memp_alloc(dbmp,
370*0Sstevel@tonic-gate 		    DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
371*0Sstevel@tonic-gate 			goto err;
372*0Sstevel@tonic-gate 		memcpy(p, finfop->fileid, DB_FILE_ID_LEN);
373*0Sstevel@tonic-gate 
374*0Sstevel@tonic-gate 		F_SET(mfp, MP_CAN_MMAP);
375*0Sstevel@tonic-gate 	}
376*0Sstevel@tonic-gate 
377*0Sstevel@tonic-gate 	/* Copy the page cookie into shared memory. */
378*0Sstevel@tonic-gate 	if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) {
379*0Sstevel@tonic-gate 		mfp->pgcookie_len = 0;
380*0Sstevel@tonic-gate 		mfp->pgcookie_off = 0;
381*0Sstevel@tonic-gate 	} else {
382*0Sstevel@tonic-gate 		if ((ret = __memp_alloc(dbmp,
383*0Sstevel@tonic-gate 		    finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
384*0Sstevel@tonic-gate 			goto err;
385*0Sstevel@tonic-gate 		memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size);
386*0Sstevel@tonic-gate 		mfp->pgcookie_len = finfop->pgcookie->size;
387*0Sstevel@tonic-gate 	}
388*0Sstevel@tonic-gate 
389*0Sstevel@tonic-gate 	/* Prepend the MPOOLFILE to the list of MPOOLFILE's. */
390*0Sstevel@tonic-gate 	SH_TAILQ_INSERT_HEAD(&dbmp->mp->mpfq, mfp, q, __mpoolfile);
391*0Sstevel@tonic-gate 
392*0Sstevel@tonic-gate 	if (0) {
393*0Sstevel@tonic-gate err:		if (mfp->path_off != 0)
394*0Sstevel@tonic-gate 			__db_shalloc_free(dbmp->addr,
395*0Sstevel@tonic-gate 			    R_ADDR(dbmp, mfp->path_off));
396*0Sstevel@tonic-gate 		if (mfp->fileid_off != 0)
397*0Sstevel@tonic-gate 			__db_shalloc_free(dbmp->addr,
398*0Sstevel@tonic-gate 			    R_ADDR(dbmp, mfp->fileid_off));
399*0Sstevel@tonic-gate 		if (mfp != NULL)
400*0Sstevel@tonic-gate 			__db_shalloc_free(dbmp->addr, mfp);
401*0Sstevel@tonic-gate 		mfp = NULL;
402*0Sstevel@tonic-gate 	}
403*0Sstevel@tonic-gate 	return (0);
404*0Sstevel@tonic-gate }
405*0Sstevel@tonic-gate 
406*0Sstevel@tonic-gate /*
407*0Sstevel@tonic-gate  * memp_fclose --
408*0Sstevel@tonic-gate  *	Close a backing file for the memory pool.
409*0Sstevel@tonic-gate  */
410*0Sstevel@tonic-gate int
memp_fclose(dbmfp)411*0Sstevel@tonic-gate memp_fclose(dbmfp)
412*0Sstevel@tonic-gate 	DB_MPOOLFILE *dbmfp;
413*0Sstevel@tonic-gate {
414*0Sstevel@tonic-gate 	DB_MPOOL *dbmp;
415*0Sstevel@tonic-gate 	int ret, t_ret;
416*0Sstevel@tonic-gate 
417*0Sstevel@tonic-gate 	dbmp = dbmfp->dbmp;
418*0Sstevel@tonic-gate 	ret = 0;
419*0Sstevel@tonic-gate 
420*0Sstevel@tonic-gate 	MP_PANIC_CHECK(dbmp);
421*0Sstevel@tonic-gate 
422*0Sstevel@tonic-gate 	for (;;) {
423*0Sstevel@tonic-gate 		LOCKHANDLE(dbmp, dbmp->mutexp);
424*0Sstevel@tonic-gate 
425*0Sstevel@tonic-gate 		/*
426*0Sstevel@tonic-gate 		 * We have to reference count DB_MPOOLFILE structures as other
427*0Sstevel@tonic-gate 		 * threads may be using them.  The problem only happens if the
428*0Sstevel@tonic-gate 		 * application makes a bad design choice.  Here's the path:
429*0Sstevel@tonic-gate 		 *
430*0Sstevel@tonic-gate 		 * Thread A opens a database.
431*0Sstevel@tonic-gate 		 * Thread B uses thread A's DB_MPOOLFILE to write a buffer
432*0Sstevel@tonic-gate 		 *    in order to free up memory in the mpool cache.
433*0Sstevel@tonic-gate 		 * Thread A closes the database while thread B is using the
434*0Sstevel@tonic-gate 		 *    DB_MPOOLFILE structure.
435*0Sstevel@tonic-gate 		 *
436*0Sstevel@tonic-gate 		 * By opening all databases before creating the threads, and
437*0Sstevel@tonic-gate 		 * closing them after the threads have exited, applications
438*0Sstevel@tonic-gate 		 * get better performance and avoid the problem path entirely.
439*0Sstevel@tonic-gate 		 *
440*0Sstevel@tonic-gate 		 * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer
441*0Sstevel@tonic-gate 		 * is a short-term lock, even in worst case, since we better be
442*0Sstevel@tonic-gate 		 * the only thread of control using the DB_MPOOLFILE structure
443*0Sstevel@tonic-gate 		 * to read pages *into* the cache.  Wait until we're the only
444*0Sstevel@tonic-gate 		 * reference holder and remove the DB_MPOOLFILE structure from
445*0Sstevel@tonic-gate 		 * the list, so nobody else can even find it.
446*0Sstevel@tonic-gate 		 */
447*0Sstevel@tonic-gate 		if (dbmfp->ref == 1) {
448*0Sstevel@tonic-gate 			TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q);
449*0Sstevel@tonic-gate 			break;
450*0Sstevel@tonic-gate 		}
451*0Sstevel@tonic-gate 		UNLOCKHANDLE(dbmp, dbmp->mutexp);
452*0Sstevel@tonic-gate 
453*0Sstevel@tonic-gate 		(void)__os_sleep(1, 0);
454*0Sstevel@tonic-gate 	}
455*0Sstevel@tonic-gate 	UNLOCKHANDLE(dbmp, dbmp->mutexp);
456*0Sstevel@tonic-gate 
457*0Sstevel@tonic-gate 	/* Complain if pinned blocks never returned. */
458*0Sstevel@tonic-gate 	if (dbmfp->pinref != 0)
459*0Sstevel@tonic-gate 		__db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned",
460*0Sstevel@tonic-gate 		    __memp_fn(dbmfp), (u_long)dbmfp->pinref);
461*0Sstevel@tonic-gate 
462*0Sstevel@tonic-gate 	/* Close the underlying MPOOLFILE. */
463*0Sstevel@tonic-gate 	(void)__memp_mf_close(dbmp, dbmfp);
464*0Sstevel@tonic-gate 
465*0Sstevel@tonic-gate 	/* Discard any mmap information. */
466*0Sstevel@tonic-gate 	if (dbmfp->addr != NULL &&
467*0Sstevel@tonic-gate 	    (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0)
468*0Sstevel@tonic-gate 		__db_err(dbmp->dbenv,
469*0Sstevel@tonic-gate 		    "%s: %s", __memp_fn(dbmfp), strerror(ret));
470*0Sstevel@tonic-gate 
471*0Sstevel@tonic-gate 	/* Close the file; temporary files may not yet have been created. */
472*0Sstevel@tonic-gate 	if (dbmfp->fd != -1 && (t_ret = __os_close(dbmfp->fd)) != 0) {
473*0Sstevel@tonic-gate 		__db_err(dbmp->dbenv,
474*0Sstevel@tonic-gate 		    "%s: %s", __memp_fn(dbmfp), strerror(t_ret));
475*0Sstevel@tonic-gate 		if (ret != 0)
476*0Sstevel@tonic-gate 			t_ret = ret;
477*0Sstevel@tonic-gate 	}
478*0Sstevel@tonic-gate 
479*0Sstevel@tonic-gate 	/* Free memory. */
480*0Sstevel@tonic-gate 	if (dbmfp->mutexp != NULL) {
481*0Sstevel@tonic-gate 		LOCKREGION(dbmp);
482*0Sstevel@tonic-gate 		__db_shalloc_free(dbmp->addr, dbmfp->mutexp);
483*0Sstevel@tonic-gate 		UNLOCKREGION(dbmp);
484*0Sstevel@tonic-gate 	}
485*0Sstevel@tonic-gate 
486*0Sstevel@tonic-gate 	/* Discard the DB_MPOOLFILE structure. */
487*0Sstevel@tonic-gate 	__os_free(dbmfp, sizeof(DB_MPOOLFILE));
488*0Sstevel@tonic-gate 
489*0Sstevel@tonic-gate 	return (ret);
490*0Sstevel@tonic-gate }
491*0Sstevel@tonic-gate 
492*0Sstevel@tonic-gate /*
493*0Sstevel@tonic-gate  * __memp_mf_close --
494*0Sstevel@tonic-gate  *	Close down an MPOOLFILE.
495*0Sstevel@tonic-gate  */
496*0Sstevel@tonic-gate static int
__memp_mf_close(dbmp,dbmfp)497*0Sstevel@tonic-gate __memp_mf_close(dbmp, dbmfp)
498*0Sstevel@tonic-gate 	DB_MPOOL *dbmp;
499*0Sstevel@tonic-gate 	DB_MPOOLFILE *dbmfp;
500*0Sstevel@tonic-gate {
501*0Sstevel@tonic-gate 	BH *bhp, *nbhp;
502*0Sstevel@tonic-gate 	MPOOL *mp;
503*0Sstevel@tonic-gate 	MPOOLFILE *mfp;
504*0Sstevel@tonic-gate 	size_t mf_offset;
505*0Sstevel@tonic-gate 
506*0Sstevel@tonic-gate 	mp = dbmp->mp;
507*0Sstevel@tonic-gate 	mfp = dbmfp->mfp;
508*0Sstevel@tonic-gate 
509*0Sstevel@tonic-gate 	LOCKREGION(dbmp);
510*0Sstevel@tonic-gate 
511*0Sstevel@tonic-gate 	/* If more than a single reference, simply decrement. */
512*0Sstevel@tonic-gate 	if (mfp->ref > 1) {
513*0Sstevel@tonic-gate 		--mfp->ref;
514*0Sstevel@tonic-gate 		goto ret1;
515*0Sstevel@tonic-gate 	}
516*0Sstevel@tonic-gate 
517*0Sstevel@tonic-gate 	/*
518*0Sstevel@tonic-gate 	 * Move any BH's held by the file to the free list.  We don't free the
519*0Sstevel@tonic-gate 	 * memory itself because we may be discarding the memory pool, and it's
520*0Sstevel@tonic-gate 	 * fairly expensive to reintegrate the buffers back into the region for
521*0Sstevel@tonic-gate 	 * no purpose.
522*0Sstevel@tonic-gate 	 */
523*0Sstevel@tonic-gate 	mf_offset = R_OFFSET(dbmp, mfp);
524*0Sstevel@tonic-gate 	for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
525*0Sstevel@tonic-gate 		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
526*0Sstevel@tonic-gate 
527*0Sstevel@tonic-gate #ifdef DEBUG_NO_DIRTY
528*0Sstevel@tonic-gate 		/* Complain if we find any blocks that were left dirty. */
529*0Sstevel@tonic-gate 		if (F_ISSET(bhp, BH_DIRTY))
530*0Sstevel@tonic-gate 			__db_err(dbmp->dbenv,
531*0Sstevel@tonic-gate 			    "%s: close: pgno %lu left dirty; ref %lu",
532*0Sstevel@tonic-gate 			    __memp_fn(dbmfp),
533*0Sstevel@tonic-gate 			    (u_long)bhp->pgno, (u_long)bhp->ref);
534*0Sstevel@tonic-gate #endif
535*0Sstevel@tonic-gate 
536*0Sstevel@tonic-gate 		if (bhp->mf_offset == mf_offset) {
537*0Sstevel@tonic-gate 			if (F_ISSET(bhp, BH_DIRTY)) {
538*0Sstevel@tonic-gate 				++mp->stat.st_page_clean;
539*0Sstevel@tonic-gate 				--mp->stat.st_page_dirty;
540*0Sstevel@tonic-gate 			}
541*0Sstevel@tonic-gate 			__memp_bhfree(dbmp, mfp, bhp, 0);
542*0Sstevel@tonic-gate 			SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh);
543*0Sstevel@tonic-gate 		}
544*0Sstevel@tonic-gate 	}
545*0Sstevel@tonic-gate 
546*0Sstevel@tonic-gate 	/* Delete from the list of MPOOLFILEs. */
547*0Sstevel@tonic-gate 	SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
548*0Sstevel@tonic-gate 
549*0Sstevel@tonic-gate 	/* Free the space. */
550*0Sstevel@tonic-gate 	if (mfp->path_off != 0)
551*0Sstevel@tonic-gate 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
552*0Sstevel@tonic-gate 	if (mfp->fileid_off != 0)
553*0Sstevel@tonic-gate 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
554*0Sstevel@tonic-gate 	if (mfp->pgcookie_off != 0)
555*0Sstevel@tonic-gate 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off));
556*0Sstevel@tonic-gate 	__db_shalloc_free(dbmp->addr, mfp);
557*0Sstevel@tonic-gate 
558*0Sstevel@tonic-gate ret1:	UNLOCKREGION(dbmp);
559*0Sstevel@tonic-gate 	return (0);
560*0Sstevel@tonic-gate }
561