1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #if defined(LIBC_SCCS) && !defined(lint) 35 /*static char sccsid[] = "from: @(#)mpool.c 8.1 (Berkeley) 6/6/93";*/ 36 static char rcsid[] = "$Id: mpool.c,v 1.2 1993/08/01 18:43:20 mycroft Exp $"; 37 #endif /* LIBC_SCCS and not lint */ 38 39 #include <sys/param.h> 40 #include <sys/stat.h> 41 42 #include <errno.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <unistd.h> 47 48 #include <db.h> 49 #define __MPOOLINTERFACE_PRIVATE 50 #include "mpool.h" 51 52 static BKT *mpool_bkt __P((MPOOL *)); 53 static BKT *mpool_look __P((MPOOL *, pgno_t)); 54 static int mpool_write __P((MPOOL *, BKT *)); 55 #ifdef DEBUG 56 static void __mpoolerr __P((const char *fmt, ...)); 57 #endif 58 59 /* 60 * MPOOL_OPEN -- initialize a memory pool. 61 * 62 * Parameters: 63 * key: Shared buffer key. 64 * fd: File descriptor. 65 * pagesize: File page size. 66 * maxcache: Max number of cached pages. 67 * 68 * Returns: 69 * MPOOL pointer, NULL on error. 70 */ 71 MPOOL * 72 mpool_open(key, fd, pagesize, maxcache) 73 DBT *key; 74 int fd; 75 pgno_t pagesize, maxcache; 76 { 77 struct stat sb; 78 MPOOL *mp; 79 int entry; 80 81 if (fstat(fd, &sb)) 82 return (NULL); 83 /* XXX 84 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so 85 * that stat(2) returns true for ISSOCK on pipes. Until then, this is 86 * fairly close. 87 */ 88 if (!S_ISREG(sb.st_mode)) { 89 errno = ESPIPE; 90 return (NULL); 91 } 92 93 if ((mp = malloc(sizeof(MPOOL))) == NULL) 94 return (NULL); 95 mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; 96 mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; 97 for (entry = 0; entry < HASHSIZE; ++entry) 98 mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = 99 mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = 100 (BKT *)&mp->hashtable[entry]; 101 mp->curcache = 0; 102 mp->maxcache = maxcache; 103 mp->pagesize = pagesize; 104 mp->npages = sb.st_size / pagesize; 105 mp->fd = fd; 106 mp->pgcookie = NULL; 107 mp->pgin = mp->pgout = NULL; 108 109 #ifdef STATISTICS 110 mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = 111 mp->pageget = mp->pagenew = mp->pageput = mp->pageread = 112 mp->pagewrite = 0; 113 #endif 114 return (mp); 115 } 116 117 /* 118 * MPOOL_FILTER -- initialize input/output filters. 119 * 120 * Parameters: 121 * pgin: Page in conversion routine. 122 * pgout: Page out conversion routine. 123 * pgcookie: Cookie for page in/out routines. 124 */ 125 void 126 mpool_filter(mp, pgin, pgout, pgcookie) 127 MPOOL *mp; 128 void (*pgin) __P((void *, pgno_t, void *)); 129 void (*pgout) __P((void *, pgno_t, void *)); 130 void *pgcookie; 131 { 132 mp->pgin = pgin; 133 mp->pgout = pgout; 134 mp->pgcookie = pgcookie; 135 } 136 137 /* 138 * MPOOL_NEW -- get a new page 139 * 140 * Parameters: 141 * mp: mpool cookie 142 * pgnoadddr: place to store new page number 143 * Returns: 144 * RET_ERROR, RET_SUCCESS 145 */ 146 void * 147 mpool_new(mp, pgnoaddr) 148 MPOOL *mp; 149 pgno_t *pgnoaddr; 150 { 151 BKT *b; 152 BKTHDR *hp; 153 154 #ifdef STATISTICS 155 ++mp->pagenew; 156 #endif 157 /* 158 * Get a BKT from the cache. Assign a new page number, attach it to 159 * the hash and lru chains and return. 160 */ 161 if ((b = mpool_bkt(mp)) == NULL) 162 return (NULL); 163 *pgnoaddr = b->pgno = mp->npages++; 164 b->flags = MPOOL_PINNED; 165 inshash(b, b->pgno); 166 inschain(b, &mp->lru); 167 return (b->page); 168 } 169 170 /* 171 * MPOOL_GET -- get a page from the pool 172 * 173 * Parameters: 174 * mp: mpool cookie 175 * pgno: page number 176 * flags: not used 177 * 178 * Returns: 179 * RET_ERROR, RET_SUCCESS 180 */ 181 void * 182 mpool_get(mp, pgno, flags) 183 MPOOL *mp; 184 pgno_t pgno; 185 u_int flags; /* XXX not used? */ 186 { 187 BKT *b; 188 BKTHDR *hp; 189 off_t off; 190 int nr; 191 192 /* 193 * If asking for a specific page that is already in the cache, find 194 * it and return it. 195 */ 196 if (b = mpool_look(mp, pgno)) { 197 #ifdef STATISTICS 198 ++mp->pageget; 199 #endif 200 #ifdef DEBUG 201 if (b->flags & MPOOL_PINNED) 202 __mpoolerr("mpool_get: page %d already pinned", 203 b->pgno); 204 #endif 205 rmchain(b); 206 inschain(b, &mp->lru); 207 b->flags |= MPOOL_PINNED; 208 return (b->page); 209 } 210 211 /* Not allowed to retrieve a non-existent page. */ 212 if (pgno >= mp->npages) { 213 errno = EINVAL; 214 return (NULL); 215 } 216 217 /* Get a page from the cache. */ 218 if ((b = mpool_bkt(mp)) == NULL) 219 return (NULL); 220 b->pgno = pgno; 221 b->flags = MPOOL_PINNED; 222 223 #ifdef STATISTICS 224 ++mp->pageread; 225 #endif 226 /* Read in the contents. */ 227 off = mp->pagesize * pgno; 228 if (lseek(mp->fd, off, SEEK_SET) != off) 229 return (NULL); 230 if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { 231 if (nr >= 0) 232 errno = EFTYPE; 233 return (NULL); 234 } 235 if (mp->pgin) 236 (mp->pgin)(mp->pgcookie, b->pgno, b->page); 237 238 inshash(b, b->pgno); 239 inschain(b, &mp->lru); 240 #ifdef STATISTICS 241 ++mp->pageget; 242 #endif 243 return (b->page); 244 } 245 246 /* 247 * MPOOL_PUT -- return a page to the pool 248 * 249 * Parameters: 250 * mp: mpool cookie 251 * page: page pointer 252 * pgno: page number 253 * 254 * Returns: 255 * RET_ERROR, RET_SUCCESS 256 */ 257 int 258 mpool_put(mp, page, flags) 259 MPOOL *mp; 260 void *page; 261 u_int flags; 262 { 263 BKT *baddr; 264 #ifdef DEBUG 265 BKT *b; 266 #endif 267 268 #ifdef STATISTICS 269 ++mp->pageput; 270 #endif 271 baddr = (BKT *)((char *)page - sizeof(BKT)); 272 #ifdef DEBUG 273 if (!(baddr->flags & MPOOL_PINNED)) 274 __mpoolerr("mpool_put: page %d not pinned", b->pgno); 275 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 276 if (b == (BKT *)&mp->lru) 277 __mpoolerr("mpool_put: %0x: bad address", baddr); 278 if (b == baddr) 279 break; 280 } 281 #endif 282 baddr->flags &= ~MPOOL_PINNED; 283 baddr->flags |= flags & MPOOL_DIRTY; 284 return (RET_SUCCESS); 285 } 286 287 /* 288 * MPOOL_CLOSE -- close the buffer pool 289 * 290 * Parameters: 291 * mp: mpool cookie 292 * 293 * Returns: 294 * RET_ERROR, RET_SUCCESS 295 */ 296 int 297 mpool_close(mp) 298 MPOOL *mp; 299 { 300 BKT *b, *next; 301 302 /* Free up any space allocated to the lru pages. */ 303 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { 304 next = b->cprev; 305 free(b); 306 } 307 free(mp); 308 return (RET_SUCCESS); 309 } 310 311 /* 312 * MPOOL_SYNC -- sync the file to disk. 313 * 314 * Parameters: 315 * mp: mpool cookie 316 * 317 * Returns: 318 * RET_ERROR, RET_SUCCESS 319 */ 320 int 321 mpool_sync(mp) 322 MPOOL *mp; 323 { 324 BKT *b; 325 326 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 327 if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) 328 return (RET_ERROR); 329 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 330 } 331 332 /* 333 * MPOOL_BKT -- get/create a BKT from the cache 334 * 335 * Parameters: 336 * mp: mpool cookie 337 * 338 * Returns: 339 * NULL on failure and a pointer to the BKT on success 340 */ 341 static BKT * 342 mpool_bkt(mp) 343 MPOOL *mp; 344 { 345 BKT *b; 346 347 if (mp->curcache < mp->maxcache) 348 goto new; 349 350 /* 351 * If the cache is maxxed out, search the lru list for a buffer we 352 * can flush. If we find one, write it if necessary and take it off 353 * any lists. If we don't find anything we grow the cache anyway. 354 * The cache never shrinks. 355 */ 356 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 357 if (!(b->flags & MPOOL_PINNED)) { 358 if (b->flags & MPOOL_DIRTY && 359 mpool_write(mp, b) == RET_ERROR) 360 return (NULL); 361 rmhash(b); 362 rmchain(b); 363 #ifdef STATISTICS 364 ++mp->pageflush; 365 #endif 366 #ifdef DEBUG 367 { 368 void *spage; 369 spage = b->page; 370 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 371 b->page = spage; 372 } 373 #endif 374 return (b); 375 } 376 377 new: if ((b = malloc(sizeof(BKT) + mp->pagesize)) == NULL) 378 return (NULL); 379 #ifdef STATISTICS 380 ++mp->pagealloc; 381 #endif 382 #ifdef DEBUG 383 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 384 #endif 385 b->page = (char *)b + sizeof(BKT); 386 ++mp->curcache; 387 return (b); 388 } 389 390 /* 391 * MPOOL_WRITE -- sync a page to disk 392 * 393 * Parameters: 394 * mp: mpool cookie 395 * 396 * Returns: 397 * RET_ERROR, RET_SUCCESS 398 */ 399 static int 400 mpool_write(mp, b) 401 MPOOL *mp; 402 BKT *b; 403 { 404 off_t off; 405 406 if (mp->pgout) 407 (mp->pgout)(mp->pgcookie, b->pgno, b->page); 408 409 #ifdef STATISTICS 410 ++mp->pagewrite; 411 #endif 412 off = mp->pagesize * b->pgno; 413 if (lseek(mp->fd, off, SEEK_SET) != off) 414 return (RET_ERROR); 415 if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) 416 return (RET_ERROR); 417 b->flags &= ~MPOOL_DIRTY; 418 return (RET_SUCCESS); 419 } 420 421 /* 422 * MPOOL_LOOK -- lookup a page 423 * 424 * Parameters: 425 * mp: mpool cookie 426 * pgno: page number 427 * 428 * Returns: 429 * NULL on failure and a pointer to the BKT on success 430 */ 431 static BKT * 432 mpool_look(mp, pgno) 433 MPOOL *mp; 434 pgno_t pgno; 435 { 436 register BKT *b; 437 register BKTHDR *tb; 438 439 /* XXX 440 * If find the buffer, put it first on the hash chain so can 441 * find it again quickly. 442 */ 443 tb = &mp->hashtable[HASHKEY(pgno)]; 444 for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) 445 if (b->pgno == pgno) { 446 #ifdef STATISTICS 447 ++mp->cachehit; 448 #endif 449 return (b); 450 } 451 #ifdef STATISTICS 452 ++mp->cachemiss; 453 #endif 454 return (NULL); 455 } 456 457 #ifdef STATISTICS 458 /* 459 * MPOOL_STAT -- cache statistics 460 * 461 * Parameters: 462 * mp: mpool cookie 463 */ 464 void 465 mpool_stat(mp) 466 MPOOL *mp; 467 { 468 BKT *b; 469 int cnt; 470 char *sep; 471 472 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 473 (void)fprintf(stderr, 474 "page size %lu, cacheing %lu pages of %lu page max cache\n", 475 mp->pagesize, mp->curcache, mp->maxcache); 476 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 477 mp->pageput, mp->pageget, mp->pagenew); 478 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 479 mp->pagealloc, mp->pageflush); 480 if (mp->cachehit + mp->cachemiss) 481 (void)fprintf(stderr, 482 "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 483 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 484 * 100, mp->cachehit, mp->cachemiss); 485 (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 486 mp->pageread, mp->pagewrite); 487 488 sep = ""; 489 cnt = 0; 490 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 491 (void)fprintf(stderr, "%s%d", sep, b->pgno); 492 if (b->flags & MPOOL_DIRTY) 493 (void)fprintf(stderr, "d"); 494 if (b->flags & MPOOL_PINNED) 495 (void)fprintf(stderr, "P"); 496 if (++cnt == 10) { 497 sep = "\n"; 498 cnt = 0; 499 } else 500 sep = ", "; 501 502 } 503 (void)fprintf(stderr, "\n"); 504 } 505 #endif 506 507 #ifdef DEBUG 508 #if __STDC__ 509 #include <stdarg.h> 510 #else 511 #include <varargs.h> 512 #endif 513 514 static void 515 #if __STDC__ 516 __mpoolerr(const char *fmt, ...) 517 #else 518 __mpoolerr(fmt, va_alist) 519 char *fmt; 520 va_dcl 521 #endif 522 { 523 va_list ap; 524 #if __STDC__ 525 va_start(ap, fmt); 526 #else 527 va_start(ap); 528 #endif 529 (void)vfprintf(stderr, fmt, ap); 530 va_end(ap); 531 (void)fprintf(stderr, "\n"); 532 abort(); 533 /* NOTREACHED */ 534 } 535 #endif 536