1*0Sstevel@tonic-gate /*- 2*0Sstevel@tonic-gate * See the file LICENSE for redistribution information. 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998 5*0Sstevel@tonic-gate * Sleepycat Software. All rights reserved. 6*0Sstevel@tonic-gate */ 7*0Sstevel@tonic-gate #include "config.h" 8*0Sstevel@tonic-gate 9*0Sstevel@tonic-gate #ifndef lint 10*0Sstevel@tonic-gate static const char sccsid[] = "@(#)mp_fopen.c 10.60 (Sleepycat) 1/1/99"; 11*0Sstevel@tonic-gate #endif /* not lint */ 12*0Sstevel@tonic-gate 13*0Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES 14*0Sstevel@tonic-gate #include <sys/types.h> 15*0Sstevel@tonic-gate 16*0Sstevel@tonic-gate #include <errno.h> 17*0Sstevel@tonic-gate #include <string.h> 18*0Sstevel@tonic-gate #endif 19*0Sstevel@tonic-gate 20*0Sstevel@tonic-gate #include "db_int.h" 21*0Sstevel@tonic-gate #include "shqueue.h" 22*0Sstevel@tonic-gate #include "db_shash.h" 23*0Sstevel@tonic-gate #include "mp.h" 24*0Sstevel@tonic-gate #include "common_ext.h" 25*0Sstevel@tonic-gate 26*0Sstevel@tonic-gate static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *)); 27*0Sstevel@tonic-gate static int __memp_mf_open __P((DB_MPOOL *, 28*0Sstevel@tonic-gate const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **)); 29*0Sstevel@tonic-gate 30*0Sstevel@tonic-gate /* 31*0Sstevel@tonic-gate * memp_fopen -- 32*0Sstevel@tonic-gate * Open a backing file for the memory pool. 33*0Sstevel@tonic-gate */ 34*0Sstevel@tonic-gate int 35*0Sstevel@tonic-gate memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp) 36*0Sstevel@tonic-gate DB_MPOOL *dbmp; 37*0Sstevel@tonic-gate const char *path; 38*0Sstevel@tonic-gate u_int32_t flags; 39*0Sstevel@tonic-gate int mode; 40*0Sstevel@tonic-gate size_t pagesize; 41*0Sstevel@tonic-gate DB_MPOOL_FINFO *finfop; 42*0Sstevel@tonic-gate DB_MPOOLFILE **retp; 43*0Sstevel@tonic-gate { 44*0Sstevel@tonic-gate int ret; 45*0Sstevel@tonic-gate 46*0Sstevel@tonic-gate MP_PANIC_CHECK(dbmp); 47*0Sstevel@tonic-gate 48*0Sstevel@tonic-gate /* Validate arguments. */ 49*0Sstevel@tonic-gate if ((ret = __db_fchk(dbmp->dbenv, 50*0Sstevel@tonic-gate "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0) 51*0Sstevel@tonic-gate return (ret); 52*0Sstevel@tonic-gate 53*0Sstevel@tonic-gate /* Require a non-zero pagesize. */ 54*0Sstevel@tonic-gate if (pagesize == 0) { 55*0Sstevel@tonic-gate __db_err(dbmp->dbenv, "memp_fopen: pagesize not specified"); 56*0Sstevel@tonic-gate return (EINVAL); 57*0Sstevel@tonic-gate } 58*0Sstevel@tonic-gate if (finfop != NULL && finfop->clear_len > pagesize) 59*0Sstevel@tonic-gate return (EINVAL); 60*0Sstevel@tonic-gate 61*0Sstevel@tonic-gate return (__memp_fopen(dbmp, 62*0Sstevel@tonic-gate NULL, path, flags, mode, pagesize, 1, finfop, retp)); 63*0Sstevel@tonic-gate } 64*0Sstevel@tonic-gate 65*0Sstevel@tonic-gate /* 66*0Sstevel@tonic-gate * __memp_fopen -- 67*0Sstevel@tonic-gate * Open a backing file for the memory pool; internal version. 68*0Sstevel@tonic-gate * 69*0Sstevel@tonic-gate * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *, 70*0Sstevel@tonic-gate * PUBLIC: u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **)); 71*0Sstevel@tonic-gate */ 72*0Sstevel@tonic-gate int 73*0Sstevel@tonic-gate __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp) 74*0Sstevel@tonic-gate DB_MPOOL *dbmp; 75*0Sstevel@tonic-gate MPOOLFILE *mfp; 76*0Sstevel@tonic-gate const char *path; 77*0Sstevel@tonic-gate u_int32_t flags; 78*0Sstevel@tonic-gate int mode, needlock; 79*0Sstevel@tonic-gate size_t pagesize; 80*0Sstevel@tonic-gate DB_MPOOL_FINFO *finfop; 81*0Sstevel@tonic-gate DB_MPOOLFILE **retp; 82*0Sstevel@tonic-gate { 83*0Sstevel@tonic-gate DB_ENV *dbenv; 84*0Sstevel@tonic-gate DB_MPOOLFILE *dbmfp; 85*0Sstevel@tonic-gate DB_MPOOL_FINFO finfo; 86*0Sstevel@tonic-gate db_pgno_t last_pgno; 87*0Sstevel@tonic-gate size_t maxmap; 88*0Sstevel@tonic-gate u_int32_t mbytes, bytes; 89*0Sstevel@tonic-gate int ret; 90*0Sstevel@tonic-gate u_int8_t idbuf[DB_FILE_ID_LEN]; 91*0Sstevel@tonic-gate char *rpath; 92*0Sstevel@tonic-gate 93*0Sstevel@tonic-gate dbenv = dbmp->dbenv; 94*0Sstevel@tonic-gate ret = 0; 95*0Sstevel@tonic-gate rpath = NULL; 96*0Sstevel@tonic-gate 97*0Sstevel@tonic-gate /* 98*0Sstevel@tonic-gate * If mfp is provided, we take the DB_MPOOL_FINFO information from 99*0Sstevel@tonic-gate * the mfp. We don't bother initializing everything, because some 100*0Sstevel@tonic-gate * of them are expensive to acquire. If no mfp is provided and the 101*0Sstevel@tonic-gate * finfop argument is NULL, we default the values. 102*0Sstevel@tonic-gate */ 103*0Sstevel@tonic-gate if (finfop == NULL) { 104*0Sstevel@tonic-gate memset(&finfo, 0, sizeof(finfo)); 105*0Sstevel@tonic-gate if (mfp != NULL) { 106*0Sstevel@tonic-gate finfo.ftype = mfp->ftype; 107*0Sstevel@tonic-gate finfo.pgcookie = NULL; 108*0Sstevel@tonic-gate finfo.fileid = NULL; 109*0Sstevel@tonic-gate finfo.lsn_offset = mfp->lsn_off; 110*0Sstevel@tonic-gate finfo.clear_len = mfp->clear_len; 111*0Sstevel@tonic-gate } else { 112*0Sstevel@tonic-gate finfo.ftype = 0; 113*0Sstevel@tonic-gate finfo.pgcookie = NULL; 114*0Sstevel@tonic-gate finfo.fileid = NULL; 115*0Sstevel@tonic-gate finfo.lsn_offset = -1; 116*0Sstevel@tonic-gate finfo.clear_len = 0; 117*0Sstevel@tonic-gate } 118*0Sstevel@tonic-gate finfop = &finfo; 119*0Sstevel@tonic-gate } 120*0Sstevel@tonic-gate 121*0Sstevel@tonic-gate /* Allocate and initialize the per-process structure. */ 122*0Sstevel@tonic-gate if ((ret = __os_calloc(1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0) 123*0Sstevel@tonic-gate return (ret); 124*0Sstevel@tonic-gate dbmfp->dbmp = dbmp; 125*0Sstevel@tonic-gate dbmfp->fd = -1; 126*0Sstevel@tonic-gate dbmfp->ref = 1; 127*0Sstevel@tonic-gate if (LF_ISSET(DB_RDONLY)) 128*0Sstevel@tonic-gate F_SET(dbmfp, MP_READONLY); 129*0Sstevel@tonic-gate 130*0Sstevel@tonic-gate if (path == NULL) { 131*0Sstevel@tonic-gate if (LF_ISSET(DB_RDONLY)) { 132*0Sstevel@tonic-gate __db_err(dbenv, 133*0Sstevel@tonic-gate "memp_fopen: temporary files can't be readonly"); 134*0Sstevel@tonic-gate ret = EINVAL; 135*0Sstevel@tonic-gate goto err; 136*0Sstevel@tonic-gate } 137*0Sstevel@tonic-gate last_pgno = 0; 138*0Sstevel@tonic-gate } else { 139*0Sstevel@tonic-gate /* Get the real name for this file and open it. */ 140*0Sstevel@tonic-gate if ((ret = __db_appname(dbenv, 141*0Sstevel@tonic-gate DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0) 142*0Sstevel@tonic-gate goto err; 143*0Sstevel@tonic-gate if ((ret = __db_open(rpath, 144*0Sstevel@tonic-gate LF_ISSET(DB_CREATE | DB_RDONLY), 145*0Sstevel@tonic-gate DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) { 146*0Sstevel@tonic-gate __db_err(dbenv, "%s: %s", rpath, strerror(ret)); 147*0Sstevel@tonic-gate goto err; 148*0Sstevel@tonic-gate } 149*0Sstevel@tonic-gate 150*0Sstevel@tonic-gate /* 151*0Sstevel@tonic-gate * Don't permit files that aren't a multiple of the pagesize, 152*0Sstevel@tonic-gate * and find the number of the last page in the file, all the 153*0Sstevel@tonic-gate * time being careful not to overflow 32 bits. 154*0Sstevel@tonic-gate * 155*0Sstevel@tonic-gate * !!! 156*0Sstevel@tonic-gate * We can't use off_t's here, or in any code in the mainline 157*0Sstevel@tonic-gate * library for that matter. (We have to use them in the os 158*0Sstevel@tonic-gate * stubs, of course, as there are system calls that take them 159*0Sstevel@tonic-gate * as arguments.) The reason is that some customers build in 160*0Sstevel@tonic-gate * environments where an off_t is 32-bits, but still run where 161*0Sstevel@tonic-gate * offsets are 64-bits, and they pay us a lot of money. 162*0Sstevel@tonic-gate */ 163*0Sstevel@tonic-gate if ((ret = __os_ioinfo(rpath, 164*0Sstevel@tonic-gate dbmfp->fd, &mbytes, &bytes, NULL)) != 0) { 165*0Sstevel@tonic-gate __db_err(dbenv, "%s: %s", rpath, strerror(ret)); 166*0Sstevel@tonic-gate goto err; 167*0Sstevel@tonic-gate } 168*0Sstevel@tonic-gate 169*0Sstevel@tonic-gate /* Page sizes have to be a power-of-two, ignore mbytes. */ 170*0Sstevel@tonic-gate if (bytes % pagesize != 0) { 171*0Sstevel@tonic-gate __db_err(dbenv, 172*0Sstevel@tonic-gate "%s: file size not a multiple of the pagesize", 173*0Sstevel@tonic-gate rpath); 174*0Sstevel@tonic-gate ret = EINVAL; 175*0Sstevel@tonic-gate goto err; 176*0Sstevel@tonic-gate } 177*0Sstevel@tonic-gate 178*0Sstevel@tonic-gate last_pgno = mbytes * (MEGABYTE / pagesize); 179*0Sstevel@tonic-gate last_pgno += bytes / pagesize; 180*0Sstevel@tonic-gate 181*0Sstevel@tonic-gate /* Correction: page numbers are zero-based, not 1-based. */ 182*0Sstevel@tonic-gate if (last_pgno != 0) 183*0Sstevel@tonic-gate --last_pgno; 184*0Sstevel@tonic-gate 185*0Sstevel@tonic-gate /* 186*0Sstevel@tonic-gate * Get the file id if we weren't given one. Generated file id's 187*0Sstevel@tonic-gate * don't use timestamps, otherwise there'd be no chance of any 188*0Sstevel@tonic-gate * other process joining the party. 189*0Sstevel@tonic-gate */ 190*0Sstevel@tonic-gate if (finfop->fileid == NULL) { 191*0Sstevel@tonic-gate if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0) 192*0Sstevel@tonic-gate goto err; 193*0Sstevel@tonic-gate finfop->fileid = idbuf; 194*0Sstevel@tonic-gate } 195*0Sstevel@tonic-gate } 196*0Sstevel@tonic-gate 197*0Sstevel@tonic-gate /* 198*0Sstevel@tonic-gate * If we weren't provided an underlying shared object to join with, 199*0Sstevel@tonic-gate * find/allocate the shared file objects. Also allocate space for 200*0Sstevel@tonic-gate * for the per-process thread lock. 201*0Sstevel@tonic-gate */ 202*0Sstevel@tonic-gate if (needlock) 203*0Sstevel@tonic-gate LOCKREGION(dbmp); 204*0Sstevel@tonic-gate 205*0Sstevel@tonic-gate if (mfp == NULL) 206*0Sstevel@tonic-gate ret = __memp_mf_open(dbmp, 207*0Sstevel@tonic-gate path, pagesize, last_pgno, finfop, &mfp); 208*0Sstevel@tonic-gate else { 209*0Sstevel@tonic-gate ++mfp->ref; 210*0Sstevel@tonic-gate ret = 0; 211*0Sstevel@tonic-gate } 212*0Sstevel@tonic-gate if (ret == 0 && 213*0Sstevel@tonic-gate F_ISSET(dbmp, MP_LOCKHANDLE) && (ret = 214*0Sstevel@tonic-gate __memp_alloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0) 215*0Sstevel@tonic-gate LOCKINIT(dbmp, dbmfp->mutexp); 216*0Sstevel@tonic-gate 217*0Sstevel@tonic-gate if (needlock) 218*0Sstevel@tonic-gate UNLOCKREGION(dbmp); 219*0Sstevel@tonic-gate if (ret != 0) 220*0Sstevel@tonic-gate goto err; 221*0Sstevel@tonic-gate 222*0Sstevel@tonic-gate dbmfp->mfp = mfp; 223*0Sstevel@tonic-gate 224*0Sstevel@tonic-gate /* 225*0Sstevel@tonic-gate * If a file: 226*0Sstevel@tonic-gate * + is read-only 227*0Sstevel@tonic-gate * + isn't temporary 228*0Sstevel@tonic-gate * + doesn't require any pgin/pgout support 229*0Sstevel@tonic-gate * + the DB_NOMMAP flag wasn't set 230*0Sstevel@tonic-gate * + and is less than mp_mmapsize bytes in size 231*0Sstevel@tonic-gate * 232*0Sstevel@tonic-gate * we can mmap it instead of reading/writing buffers. Don't do error 233*0Sstevel@tonic-gate * checking based on the mmap call failure. We want to do normal I/O 234*0Sstevel@tonic-gate * on the file if the reason we failed was because the file was on an 235*0Sstevel@tonic-gate * NFS mounted partition, and we can fail in buffer I/O just as easily 236*0Sstevel@tonic-gate * as here. 237*0Sstevel@tonic-gate * 238*0Sstevel@tonic-gate * XXX 239*0Sstevel@tonic-gate * We'd like to test to see if the file is too big to mmap. Since we 240*0Sstevel@tonic-gate * don't know what size or type off_t's or size_t's are, or the largest 241*0Sstevel@tonic-gate * unsigned integral type is, or what random insanity the local C 242*0Sstevel@tonic-gate * compiler will perpetrate, doing the comparison in a portable way is 243*0Sstevel@tonic-gate * flatly impossible. Hope that mmap fails if the file is too large. 244*0Sstevel@tonic-gate */ 245*0Sstevel@tonic-gate #define DB_MAXMMAPSIZE (10 * 1024 * 1024) /* 10 Mb. */ 246*0Sstevel@tonic-gate if (F_ISSET(mfp, MP_CAN_MMAP)) { 247*0Sstevel@tonic-gate if (!F_ISSET(dbmfp, MP_READONLY)) 248*0Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP); 249*0Sstevel@tonic-gate if (path == NULL) 250*0Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP); 251*0Sstevel@tonic-gate if (finfop->ftype != 0) 252*0Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP); 253*0Sstevel@tonic-gate if (LF_ISSET(DB_NOMMAP)) 254*0Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP); 255*0Sstevel@tonic-gate maxmap = dbenv == NULL || dbenv->mp_mmapsize == 0 ? 256*0Sstevel@tonic-gate DB_MAXMMAPSIZE : dbenv->mp_mmapsize; 257*0Sstevel@tonic-gate if (mbytes > maxmap / MEGABYTE || 258*0Sstevel@tonic-gate (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE)) 259*0Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP); 260*0Sstevel@tonic-gate } 261*0Sstevel@tonic-gate dbmfp->addr = NULL; 262*0Sstevel@tonic-gate if (F_ISSET(mfp, MP_CAN_MMAP)) { 263*0Sstevel@tonic-gate dbmfp->len = (size_t)mbytes * MEGABYTE + bytes; 264*0Sstevel@tonic-gate if (__db_mapfile(rpath, 265*0Sstevel@tonic-gate dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) { 266*0Sstevel@tonic-gate dbmfp->addr = NULL; 267*0Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP); 268*0Sstevel@tonic-gate } 269*0Sstevel@tonic-gate } 270*0Sstevel@tonic-gate if (rpath != NULL) 271*0Sstevel@tonic-gate __os_freestr(rpath); 272*0Sstevel@tonic-gate 273*0Sstevel@tonic-gate LOCKHANDLE(dbmp, dbmp->mutexp); 274*0Sstevel@tonic-gate TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q); 275*0Sstevel@tonic-gate UNLOCKHANDLE(dbmp, dbmp->mutexp); 276*0Sstevel@tonic-gate 277*0Sstevel@tonic-gate *retp = dbmfp; 278*0Sstevel@tonic-gate return (0); 279*0Sstevel@tonic-gate 280*0Sstevel@tonic-gate err: /* 281*0Sstevel@tonic-gate * Note that we do not have to free the thread mutex, because we 282*0Sstevel@tonic-gate * never get to here after we have successfully allocated it. 283*0Sstevel@tonic-gate */ 284*0Sstevel@tonic-gate if (rpath != NULL) 285*0Sstevel@tonic-gate __os_freestr(rpath); 286*0Sstevel@tonic-gate if (dbmfp->fd != -1) 287*0Sstevel@tonic-gate (void)__os_close(dbmfp->fd); 288*0Sstevel@tonic-gate if (dbmfp != NULL) 289*0Sstevel@tonic-gate __os_free(dbmfp, sizeof(DB_MPOOLFILE)); 290*0Sstevel@tonic-gate return (ret); 291*0Sstevel@tonic-gate } 292*0Sstevel@tonic-gate 293*0Sstevel@tonic-gate /* 294*0Sstevel@tonic-gate * __memp_mf_open -- 295*0Sstevel@tonic-gate * Open an MPOOLFILE. 296*0Sstevel@tonic-gate */ 297*0Sstevel@tonic-gate static int 298*0Sstevel@tonic-gate __memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp) 299*0Sstevel@tonic-gate DB_MPOOL *dbmp; 300*0Sstevel@tonic-gate const char *path; 301*0Sstevel@tonic-gate size_t pagesize; 302*0Sstevel@tonic-gate db_pgno_t last_pgno; 303*0Sstevel@tonic-gate DB_MPOOL_FINFO *finfop; 304*0Sstevel@tonic-gate MPOOLFILE **retp; 305*0Sstevel@tonic-gate { 306*0Sstevel@tonic-gate MPOOLFILE *mfp; 307*0Sstevel@tonic-gate int ret; 308*0Sstevel@tonic-gate void *p; 309*0Sstevel@tonic-gate 310*0Sstevel@tonic-gate #define ISTEMPORARY (path == NULL) 311*0Sstevel@tonic-gate 312*0Sstevel@tonic-gate /* 313*0Sstevel@tonic-gate * Walk the list of MPOOLFILE's, looking for a matching file. 314*0Sstevel@tonic-gate * Temporary files can't match previous files. 315*0Sstevel@tonic-gate */ 316*0Sstevel@tonic-gate if (!ISTEMPORARY) 317*0Sstevel@tonic-gate for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile); 318*0Sstevel@tonic-gate mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { 319*0Sstevel@tonic-gate if (F_ISSET(mfp, MP_TEMP)) 320*0Sstevel@tonic-gate continue; 321*0Sstevel@tonic-gate if (!memcmp(finfop->fileid, 322*0Sstevel@tonic-gate R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) { 323*0Sstevel@tonic-gate if (finfop->clear_len != mfp->clear_len || 324*0Sstevel@tonic-gate finfop->ftype != mfp->ftype || 325*0Sstevel@tonic-gate pagesize != mfp->stat.st_pagesize) { 326*0Sstevel@tonic-gate __db_err(dbmp->dbenv, 327*0Sstevel@tonic-gate "%s: ftype, clear length or pagesize changed", 328*0Sstevel@tonic-gate path); 329*0Sstevel@tonic-gate return (EINVAL); 330*0Sstevel@tonic-gate } 331*0Sstevel@tonic-gate 332*0Sstevel@tonic-gate /* Found it: increment the reference count. */ 333*0Sstevel@tonic-gate ++mfp->ref; 334*0Sstevel@tonic-gate *retp = mfp; 335*0Sstevel@tonic-gate return (0); 336*0Sstevel@tonic-gate } 337*0Sstevel@tonic-gate } 338*0Sstevel@tonic-gate 339*0Sstevel@tonic-gate /* Allocate a new MPOOLFILE. */ 340*0Sstevel@tonic-gate if ((ret = __memp_alloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0) 341*0Sstevel@tonic-gate return (ret); 342*0Sstevel@tonic-gate *retp = mfp; 343*0Sstevel@tonic-gate 344*0Sstevel@tonic-gate /* Initialize the structure. */ 345*0Sstevel@tonic-gate memset(mfp, 0, sizeof(MPOOLFILE)); 346*0Sstevel@tonic-gate mfp->ref = 1; 347*0Sstevel@tonic-gate mfp->ftype = finfop->ftype; 348*0Sstevel@tonic-gate mfp->lsn_off = finfop->lsn_offset; 349*0Sstevel@tonic-gate mfp->clear_len = finfop->clear_len; 350*0Sstevel@tonic-gate 351*0Sstevel@tonic-gate /* 352*0Sstevel@tonic-gate * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget, 353*0Sstevel@tonic-gate * we have to know the last page in the file. Figure it out and save 354*0Sstevel@tonic-gate * it away. 355*0Sstevel@tonic-gate */ 356*0Sstevel@tonic-gate mfp->stat.st_pagesize = pagesize; 357*0Sstevel@tonic-gate mfp->orig_last_pgno = mfp->last_pgno = last_pgno; 358*0Sstevel@tonic-gate 359*0Sstevel@tonic-gate if (ISTEMPORARY) 360*0Sstevel@tonic-gate F_SET(mfp, MP_TEMP); 361*0Sstevel@tonic-gate else { 362*0Sstevel@tonic-gate /* Copy the file path into shared memory. */ 363*0Sstevel@tonic-gate if ((ret = __memp_alloc(dbmp, 364*0Sstevel@tonic-gate strlen(path) + 1, &mfp->path_off, &p)) != 0) 365*0Sstevel@tonic-gate goto err; 366*0Sstevel@tonic-gate memcpy(p, path, strlen(path) + 1); 367*0Sstevel@tonic-gate 368*0Sstevel@tonic-gate /* Copy the file identification string into shared memory. */ 369*0Sstevel@tonic-gate if ((ret = __memp_alloc(dbmp, 370*0Sstevel@tonic-gate DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0) 371*0Sstevel@tonic-gate goto err; 372*0Sstevel@tonic-gate memcpy(p, finfop->fileid, DB_FILE_ID_LEN); 373*0Sstevel@tonic-gate 374*0Sstevel@tonic-gate F_SET(mfp, MP_CAN_MMAP); 375*0Sstevel@tonic-gate } 376*0Sstevel@tonic-gate 377*0Sstevel@tonic-gate /* Copy the page cookie into shared memory. */ 378*0Sstevel@tonic-gate if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) { 379*0Sstevel@tonic-gate mfp->pgcookie_len = 0; 380*0Sstevel@tonic-gate mfp->pgcookie_off = 0; 381*0Sstevel@tonic-gate } else { 382*0Sstevel@tonic-gate if ((ret = __memp_alloc(dbmp, 383*0Sstevel@tonic-gate finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0) 384*0Sstevel@tonic-gate goto err; 385*0Sstevel@tonic-gate memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size); 386*0Sstevel@tonic-gate mfp->pgcookie_len = finfop->pgcookie->size; 387*0Sstevel@tonic-gate } 388*0Sstevel@tonic-gate 389*0Sstevel@tonic-gate /* Prepend the MPOOLFILE to the list of MPOOLFILE's. */ 390*0Sstevel@tonic-gate SH_TAILQ_INSERT_HEAD(&dbmp->mp->mpfq, mfp, q, __mpoolfile); 391*0Sstevel@tonic-gate 392*0Sstevel@tonic-gate if (0) { 393*0Sstevel@tonic-gate err: if (mfp->path_off != 0) 394*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, 395*0Sstevel@tonic-gate R_ADDR(dbmp, mfp->path_off)); 396*0Sstevel@tonic-gate if (mfp->fileid_off != 0) 397*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, 398*0Sstevel@tonic-gate R_ADDR(dbmp, mfp->fileid_off)); 399*0Sstevel@tonic-gate if (mfp != NULL) 400*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, mfp); 401*0Sstevel@tonic-gate mfp = NULL; 402*0Sstevel@tonic-gate } 403*0Sstevel@tonic-gate return (0); 404*0Sstevel@tonic-gate } 405*0Sstevel@tonic-gate 406*0Sstevel@tonic-gate /* 407*0Sstevel@tonic-gate * memp_fclose -- 408*0Sstevel@tonic-gate * Close a backing file for the memory pool. 409*0Sstevel@tonic-gate */ 410*0Sstevel@tonic-gate int 411*0Sstevel@tonic-gate memp_fclose(dbmfp) 412*0Sstevel@tonic-gate DB_MPOOLFILE *dbmfp; 413*0Sstevel@tonic-gate { 414*0Sstevel@tonic-gate DB_MPOOL *dbmp; 415*0Sstevel@tonic-gate int ret, t_ret; 416*0Sstevel@tonic-gate 417*0Sstevel@tonic-gate dbmp = dbmfp->dbmp; 418*0Sstevel@tonic-gate ret = 0; 419*0Sstevel@tonic-gate 420*0Sstevel@tonic-gate MP_PANIC_CHECK(dbmp); 421*0Sstevel@tonic-gate 422*0Sstevel@tonic-gate for (;;) { 423*0Sstevel@tonic-gate LOCKHANDLE(dbmp, dbmp->mutexp); 424*0Sstevel@tonic-gate 425*0Sstevel@tonic-gate /* 426*0Sstevel@tonic-gate * We have to reference count DB_MPOOLFILE structures as other 427*0Sstevel@tonic-gate * threads may be using them. The problem only happens if the 428*0Sstevel@tonic-gate * application makes a bad design choice. Here's the path: 429*0Sstevel@tonic-gate * 430*0Sstevel@tonic-gate * Thread A opens a database. 431*0Sstevel@tonic-gate * Thread B uses thread A's DB_MPOOLFILE to write a buffer 432*0Sstevel@tonic-gate * in order to free up memory in the mpool cache. 433*0Sstevel@tonic-gate * Thread A closes the database while thread B is using the 434*0Sstevel@tonic-gate * DB_MPOOLFILE structure. 435*0Sstevel@tonic-gate * 436*0Sstevel@tonic-gate * By opening all databases before creating the threads, and 437*0Sstevel@tonic-gate * closing them after the threads have exited, applications 438*0Sstevel@tonic-gate * get better performance and avoid the problem path entirely. 439*0Sstevel@tonic-gate * 440*0Sstevel@tonic-gate * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer 441*0Sstevel@tonic-gate * is a short-term lock, even in worst case, since we better be 442*0Sstevel@tonic-gate * the only thread of control using the DB_MPOOLFILE structure 443*0Sstevel@tonic-gate * to read pages *into* the cache. Wait until we're the only 444*0Sstevel@tonic-gate * reference holder and remove the DB_MPOOLFILE structure from 445*0Sstevel@tonic-gate * the list, so nobody else can even find it. 446*0Sstevel@tonic-gate */ 447*0Sstevel@tonic-gate if (dbmfp->ref == 1) { 448*0Sstevel@tonic-gate TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q); 449*0Sstevel@tonic-gate break; 450*0Sstevel@tonic-gate } 451*0Sstevel@tonic-gate UNLOCKHANDLE(dbmp, dbmp->mutexp); 452*0Sstevel@tonic-gate 453*0Sstevel@tonic-gate (void)__os_sleep(1, 0); 454*0Sstevel@tonic-gate } 455*0Sstevel@tonic-gate UNLOCKHANDLE(dbmp, dbmp->mutexp); 456*0Sstevel@tonic-gate 457*0Sstevel@tonic-gate /* Complain if pinned blocks never returned. */ 458*0Sstevel@tonic-gate if (dbmfp->pinref != 0) 459*0Sstevel@tonic-gate __db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned", 460*0Sstevel@tonic-gate __memp_fn(dbmfp), (u_long)dbmfp->pinref); 461*0Sstevel@tonic-gate 462*0Sstevel@tonic-gate /* Close the underlying MPOOLFILE. */ 463*0Sstevel@tonic-gate (void)__memp_mf_close(dbmp, dbmfp); 464*0Sstevel@tonic-gate 465*0Sstevel@tonic-gate /* Discard any mmap information. */ 466*0Sstevel@tonic-gate if (dbmfp->addr != NULL && 467*0Sstevel@tonic-gate (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0) 468*0Sstevel@tonic-gate __db_err(dbmp->dbenv, 469*0Sstevel@tonic-gate "%s: %s", __memp_fn(dbmfp), strerror(ret)); 470*0Sstevel@tonic-gate 471*0Sstevel@tonic-gate /* Close the file; temporary files may not yet have been created. */ 472*0Sstevel@tonic-gate if (dbmfp->fd != -1 && (t_ret = __os_close(dbmfp->fd)) != 0) { 473*0Sstevel@tonic-gate __db_err(dbmp->dbenv, 474*0Sstevel@tonic-gate "%s: %s", __memp_fn(dbmfp), strerror(t_ret)); 475*0Sstevel@tonic-gate if (ret != 0) 476*0Sstevel@tonic-gate t_ret = ret; 477*0Sstevel@tonic-gate } 478*0Sstevel@tonic-gate 479*0Sstevel@tonic-gate /* Free memory. */ 480*0Sstevel@tonic-gate if (dbmfp->mutexp != NULL) { 481*0Sstevel@tonic-gate LOCKREGION(dbmp); 482*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, dbmfp->mutexp); 483*0Sstevel@tonic-gate UNLOCKREGION(dbmp); 484*0Sstevel@tonic-gate } 485*0Sstevel@tonic-gate 486*0Sstevel@tonic-gate /* Discard the DB_MPOOLFILE structure. */ 487*0Sstevel@tonic-gate __os_free(dbmfp, sizeof(DB_MPOOLFILE)); 488*0Sstevel@tonic-gate 489*0Sstevel@tonic-gate return (ret); 490*0Sstevel@tonic-gate } 491*0Sstevel@tonic-gate 492*0Sstevel@tonic-gate /* 493*0Sstevel@tonic-gate * __memp_mf_close -- 494*0Sstevel@tonic-gate * Close down an MPOOLFILE. 495*0Sstevel@tonic-gate */ 496*0Sstevel@tonic-gate static int 497*0Sstevel@tonic-gate __memp_mf_close(dbmp, dbmfp) 498*0Sstevel@tonic-gate DB_MPOOL *dbmp; 499*0Sstevel@tonic-gate DB_MPOOLFILE *dbmfp; 500*0Sstevel@tonic-gate { 501*0Sstevel@tonic-gate BH *bhp, *nbhp; 502*0Sstevel@tonic-gate MPOOL *mp; 503*0Sstevel@tonic-gate MPOOLFILE *mfp; 504*0Sstevel@tonic-gate size_t mf_offset; 505*0Sstevel@tonic-gate 506*0Sstevel@tonic-gate mp = dbmp->mp; 507*0Sstevel@tonic-gate mfp = dbmfp->mfp; 508*0Sstevel@tonic-gate 509*0Sstevel@tonic-gate LOCKREGION(dbmp); 510*0Sstevel@tonic-gate 511*0Sstevel@tonic-gate /* If more than a single reference, simply decrement. */ 512*0Sstevel@tonic-gate if (mfp->ref > 1) { 513*0Sstevel@tonic-gate --mfp->ref; 514*0Sstevel@tonic-gate goto ret1; 515*0Sstevel@tonic-gate } 516*0Sstevel@tonic-gate 517*0Sstevel@tonic-gate /* 518*0Sstevel@tonic-gate * Move any BH's held by the file to the free list. We don't free the 519*0Sstevel@tonic-gate * memory itself because we may be discarding the memory pool, and it's 520*0Sstevel@tonic-gate * fairly expensive to reintegrate the buffers back into the region for 521*0Sstevel@tonic-gate * no purpose. 522*0Sstevel@tonic-gate */ 523*0Sstevel@tonic-gate mf_offset = R_OFFSET(dbmp, mfp); 524*0Sstevel@tonic-gate for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) { 525*0Sstevel@tonic-gate nbhp = SH_TAILQ_NEXT(bhp, q, __bh); 526*0Sstevel@tonic-gate 527*0Sstevel@tonic-gate #ifdef DEBUG_NO_DIRTY 528*0Sstevel@tonic-gate /* Complain if we find any blocks that were left dirty. */ 529*0Sstevel@tonic-gate if (F_ISSET(bhp, BH_DIRTY)) 530*0Sstevel@tonic-gate __db_err(dbmp->dbenv, 531*0Sstevel@tonic-gate "%s: close: pgno %lu left dirty; ref %lu", 532*0Sstevel@tonic-gate __memp_fn(dbmfp), 533*0Sstevel@tonic-gate (u_long)bhp->pgno, (u_long)bhp->ref); 534*0Sstevel@tonic-gate #endif 535*0Sstevel@tonic-gate 536*0Sstevel@tonic-gate if (bhp->mf_offset == mf_offset) { 537*0Sstevel@tonic-gate if (F_ISSET(bhp, BH_DIRTY)) { 538*0Sstevel@tonic-gate ++mp->stat.st_page_clean; 539*0Sstevel@tonic-gate --mp->stat.st_page_dirty; 540*0Sstevel@tonic-gate } 541*0Sstevel@tonic-gate __memp_bhfree(dbmp, mfp, bhp, 0); 542*0Sstevel@tonic-gate SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh); 543*0Sstevel@tonic-gate } 544*0Sstevel@tonic-gate } 545*0Sstevel@tonic-gate 546*0Sstevel@tonic-gate /* Delete from the list of MPOOLFILEs. */ 547*0Sstevel@tonic-gate SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile); 548*0Sstevel@tonic-gate 549*0Sstevel@tonic-gate /* Free the space. */ 550*0Sstevel@tonic-gate if (mfp->path_off != 0) 551*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off)); 552*0Sstevel@tonic-gate if (mfp->fileid_off != 0) 553*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off)); 554*0Sstevel@tonic-gate if (mfp->pgcookie_off != 0) 555*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off)); 556*0Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, mfp); 557*0Sstevel@tonic-gate 558*0Sstevel@tonic-gate ret1: UNLOCKREGION(dbmp); 559*0Sstevel@tonic-gate return (0); 560*0Sstevel@tonic-gate } 561