1 /* $NetBSD: mpool.c,v 1.5 1995/02/27 13:24:05 cgd Exp $ */ 2 3 /*- 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #if defined(LIBC_SCCS) && !defined(lint) 37 #if 0 38 static char sccsid[] = "@(#)mpool.c 8.2 (Berkeley) 2/21/94"; 39 #else 40 static char rcsid[] = "$NetBSD: mpool.c,v 1.5 1995/02/27 13:24:05 cgd Exp $"; 41 #endif 42 #endif /* LIBC_SCCS and not lint */ 43 44 #include <sys/param.h> 45 #include <sys/stat.h> 46 47 #include <errno.h> 48 #include <stdio.h> 49 #include <stdlib.h> 50 #include <string.h> 51 #include <unistd.h> 52 53 #include <db.h> 54 #define __MPOOLINTERFACE_PRIVATE 55 #include "mpool.h" 56 57 static BKT *mpool_bkt __P((MPOOL *)); 58 static BKT *mpool_look __P((MPOOL *, pgno_t)); 59 static int mpool_write __P((MPOOL *, BKT *)); 60 #ifdef DEBUG 61 static void __mpoolerr __P((const char *fmt, ...)); 62 #endif 63 64 /* 65 * MPOOL_OPEN -- initialize a memory pool. 66 * 67 * Parameters: 68 * key: Shared buffer key. 69 * fd: File descriptor. 70 * pagesize: File page size. 71 * maxcache: Max number of cached pages. 72 * 73 * Returns: 74 * MPOOL pointer, NULL on error. 75 */ 76 MPOOL * 77 mpool_open(key, fd, pagesize, maxcache) 78 DBT *key; 79 int fd; 80 pgno_t pagesize, maxcache; 81 { 82 struct stat sb; 83 MPOOL *mp; 84 int entry; 85 86 if (fstat(fd, &sb)) 87 return (NULL); 88 /* XXX 89 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so 90 * that stat(2) returns true for ISSOCK on pipes. Until then, this is 91 * fairly close. 92 */ 93 if (!S_ISREG(sb.st_mode)) { 94 errno = ESPIPE; 95 return (NULL); 96 } 97 98 if ((mp = (MPOOL *)malloc(sizeof(MPOOL))) == NULL) 99 return (NULL); 100 mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; 101 mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; 102 for (entry = 0; entry < HASHSIZE; ++entry) 103 mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = 104 mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = 105 (BKT *)&mp->hashtable[entry]; 106 mp->curcache = 0; 107 mp->maxcache = maxcache; 108 mp->pagesize = pagesize; 109 mp->npages = sb.st_size / pagesize; 110 mp->fd = fd; 111 mp->pgcookie = NULL; 112 mp->pgin = mp->pgout = NULL; 113 114 #ifdef STATISTICS 115 mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = 116 mp->pageget = mp->pagenew = mp->pageput = mp->pageread = 117 mp->pagewrite = 0; 118 #endif 119 return (mp); 120 } 121 122 /* 123 * MPOOL_FILTER -- initialize input/output filters. 124 * 125 * Parameters: 126 * pgin: Page in conversion routine. 127 * pgout: Page out conversion routine. 128 * pgcookie: Cookie for page in/out routines. 129 */ 130 void 131 mpool_filter(mp, pgin, pgout, pgcookie) 132 MPOOL *mp; 133 void (*pgin) __P((void *, pgno_t, void *)); 134 void (*pgout) __P((void *, pgno_t, void *)); 135 void *pgcookie; 136 { 137 mp->pgin = pgin; 138 mp->pgout = pgout; 139 mp->pgcookie = pgcookie; 140 } 141 142 /* 143 * MPOOL_NEW -- get a new page 144 * 145 * Parameters: 146 * mp: mpool cookie 147 * pgnoadddr: place to store new page number 148 * Returns: 149 * RET_ERROR, RET_SUCCESS 150 */ 151 void * 152 mpool_new(mp, pgnoaddr) 153 MPOOL *mp; 154 pgno_t *pgnoaddr; 155 { 156 BKT *b; 157 BKTHDR *hp; 158 159 #ifdef STATISTICS 160 ++mp->pagenew; 161 #endif 162 /* 163 * Get a BKT from the cache. Assign a new page number, attach it to 164 * the hash and lru chains and return. 165 */ 166 if ((b = mpool_bkt(mp)) == NULL) 167 return (NULL); 168 *pgnoaddr = b->pgno = mp->npages++; 169 b->flags = MPOOL_PINNED; 170 inshash(b, b->pgno); 171 inschain(b, &mp->lru); 172 return (b->page); 173 } 174 175 /* 176 * MPOOL_GET -- get a page from the pool 177 * 178 * Parameters: 179 * mp: mpool cookie 180 * pgno: page number 181 * flags: not used 182 * 183 * Returns: 184 * RET_ERROR, RET_SUCCESS 185 */ 186 void * 187 mpool_get(mp, pgno, flags) 188 MPOOL *mp; 189 pgno_t pgno; 190 u_int flags; /* XXX not used? */ 191 { 192 BKT *b; 193 BKTHDR *hp; 194 off_t off; 195 int nr; 196 197 /* 198 * If asking for a specific page that is already in the cache, find 199 * it and return it. 200 */ 201 if (b = mpool_look(mp, pgno)) { 202 #ifdef STATISTICS 203 ++mp->pageget; 204 #endif 205 #ifdef DEBUG 206 if (b->flags & MPOOL_PINNED) 207 __mpoolerr("mpool_get: page %d already pinned", 208 b->pgno); 209 #endif 210 rmchain(b); 211 inschain(b, &mp->lru); 212 b->flags |= MPOOL_PINNED; 213 return (b->page); 214 } 215 216 /* Not allowed to retrieve a non-existent page. */ 217 if (pgno >= mp->npages) { 218 errno = EINVAL; 219 return (NULL); 220 } 221 222 /* Get a page from the cache. */ 223 if ((b = mpool_bkt(mp)) == NULL) 224 return (NULL); 225 b->pgno = pgno; 226 b->flags = MPOOL_PINNED; 227 228 #ifdef STATISTICS 229 ++mp->pageread; 230 #endif 231 /* Read in the contents. */ 232 off = mp->pagesize * pgno; 233 if (lseek(mp->fd, off, SEEK_SET) != off) 234 return (NULL); 235 if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { 236 if (nr >= 0) 237 errno = EFTYPE; 238 return (NULL); 239 } 240 if (mp->pgin) 241 (mp->pgin)(mp->pgcookie, b->pgno, b->page); 242 243 inshash(b, b->pgno); 244 inschain(b, &mp->lru); 245 #ifdef STATISTICS 246 ++mp->pageget; 247 #endif 248 return (b->page); 249 } 250 251 /* 252 * MPOOL_PUT -- return a page to the pool 253 * 254 * Parameters: 255 * mp: mpool cookie 256 * page: page pointer 257 * pgno: page number 258 * 259 * Returns: 260 * RET_ERROR, RET_SUCCESS 261 */ 262 int 263 mpool_put(mp, page, flags) 264 MPOOL *mp; 265 void *page; 266 u_int flags; 267 { 268 BKT *baddr; 269 #ifdef DEBUG 270 BKT *b; 271 #endif 272 273 #ifdef STATISTICS 274 ++mp->pageput; 275 #endif 276 baddr = (BKT *)((char *)page - sizeof(BKT)); 277 #ifdef DEBUG 278 if (!(baddr->flags & MPOOL_PINNED)) 279 __mpoolerr("mpool_put: page %d not pinned", b->pgno); 280 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 281 if (b == (BKT *)&mp->lru) 282 __mpoolerr("mpool_put: %0x: bad address", baddr); 283 if (b == baddr) 284 break; 285 } 286 #endif 287 baddr->flags &= ~MPOOL_PINNED; 288 baddr->flags |= flags & MPOOL_DIRTY; 289 return (RET_SUCCESS); 290 } 291 292 /* 293 * MPOOL_CLOSE -- close the buffer pool 294 * 295 * Parameters: 296 * mp: mpool cookie 297 * 298 * Returns: 299 * RET_ERROR, RET_SUCCESS 300 */ 301 int 302 mpool_close(mp) 303 MPOOL *mp; 304 { 305 BKT *b, *next; 306 307 /* Free up any space allocated to the lru pages. */ 308 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { 309 next = b->cprev; 310 free(b); 311 } 312 free(mp); 313 return (RET_SUCCESS); 314 } 315 316 /* 317 * MPOOL_SYNC -- sync the file to disk. 318 * 319 * Parameters: 320 * mp: mpool cookie 321 * 322 * Returns: 323 * RET_ERROR, RET_SUCCESS 324 */ 325 int 326 mpool_sync(mp) 327 MPOOL *mp; 328 { 329 BKT *b; 330 331 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 332 if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) 333 return (RET_ERROR); 334 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 335 } 336 337 /* 338 * MPOOL_BKT -- get/create a BKT from the cache 339 * 340 * Parameters: 341 * mp: mpool cookie 342 * 343 * Returns: 344 * NULL on failure and a pointer to the BKT on success 345 */ 346 static BKT * 347 mpool_bkt(mp) 348 MPOOL *mp; 349 { 350 BKT *b; 351 352 if (mp->curcache < mp->maxcache) 353 goto new; 354 355 /* 356 * If the cache is maxxed out, search the lru list for a buffer we 357 * can flush. If we find one, write it if necessary and take it off 358 * any lists. If we don't find anything we grow the cache anyway. 359 * The cache never shrinks. 360 */ 361 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 362 if (!(b->flags & MPOOL_PINNED)) { 363 if (b->flags & MPOOL_DIRTY && 364 mpool_write(mp, b) == RET_ERROR) 365 return (NULL); 366 rmhash(b); 367 rmchain(b); 368 #ifdef STATISTICS 369 ++mp->pageflush; 370 #endif 371 #ifdef DEBUG 372 { 373 void *spage; 374 spage = b->page; 375 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 376 b->page = spage; 377 } 378 #endif 379 return (b); 380 } 381 382 new: if ((b = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL) 383 return (NULL); 384 #ifdef STATISTICS 385 ++mp->pagealloc; 386 #endif 387 #ifdef DEBUG 388 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 389 #endif 390 b->page = (char *)b + sizeof(BKT); 391 ++mp->curcache; 392 return (b); 393 } 394 395 /* 396 * MPOOL_WRITE -- sync a page to disk 397 * 398 * Parameters: 399 * mp: mpool cookie 400 * 401 * Returns: 402 * RET_ERROR, RET_SUCCESS 403 */ 404 static int 405 mpool_write(mp, b) 406 MPOOL *mp; 407 BKT *b; 408 { 409 off_t off; 410 411 if (mp->pgout) 412 (mp->pgout)(mp->pgcookie, b->pgno, b->page); 413 414 #ifdef STATISTICS 415 ++mp->pagewrite; 416 #endif 417 off = mp->pagesize * b->pgno; 418 if (lseek(mp->fd, off, SEEK_SET) != off) 419 return (RET_ERROR); 420 if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) 421 return (RET_ERROR); 422 b->flags &= ~MPOOL_DIRTY; 423 return (RET_SUCCESS); 424 } 425 426 /* 427 * MPOOL_LOOK -- lookup a page 428 * 429 * Parameters: 430 * mp: mpool cookie 431 * pgno: page number 432 * 433 * Returns: 434 * NULL on failure and a pointer to the BKT on success 435 */ 436 static BKT * 437 mpool_look(mp, pgno) 438 MPOOL *mp; 439 pgno_t pgno; 440 { 441 register BKT *b; 442 register BKTHDR *tb; 443 444 /* XXX 445 * If find the buffer, put it first on the hash chain so can 446 * find it again quickly. 447 */ 448 tb = &mp->hashtable[HASHKEY(pgno)]; 449 for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) 450 if (b->pgno == pgno) { 451 #ifdef STATISTICS 452 ++mp->cachehit; 453 #endif 454 return (b); 455 } 456 #ifdef STATISTICS 457 ++mp->cachemiss; 458 #endif 459 return (NULL); 460 } 461 462 #ifdef STATISTICS 463 /* 464 * MPOOL_STAT -- cache statistics 465 * 466 * Parameters: 467 * mp: mpool cookie 468 */ 469 void 470 mpool_stat(mp) 471 MPOOL *mp; 472 { 473 BKT *b; 474 int cnt; 475 char *sep; 476 477 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 478 (void)fprintf(stderr, 479 "page size %lu, cacheing %lu pages of %lu page max cache\n", 480 mp->pagesize, mp->curcache, mp->maxcache); 481 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 482 mp->pageput, mp->pageget, mp->pagenew); 483 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 484 mp->pagealloc, mp->pageflush); 485 if (mp->cachehit + mp->cachemiss) 486 (void)fprintf(stderr, 487 "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 488 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 489 * 100, mp->cachehit, mp->cachemiss); 490 (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 491 mp->pageread, mp->pagewrite); 492 493 sep = ""; 494 cnt = 0; 495 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 496 (void)fprintf(stderr, "%s%d", sep, b->pgno); 497 if (b->flags & MPOOL_DIRTY) 498 (void)fprintf(stderr, "d"); 499 if (b->flags & MPOOL_PINNED) 500 (void)fprintf(stderr, "P"); 501 if (++cnt == 10) { 502 sep = "\n"; 503 cnt = 0; 504 } else 505 sep = ", "; 506 507 } 508 (void)fprintf(stderr, "\n"); 509 } 510 #endif 511 512 #ifdef DEBUG 513 #if __STDC__ 514 #include <stdarg.h> 515 #else 516 #include <varargs.h> 517 #endif 518 519 static void 520 #if __STDC__ 521 __mpoolerr(const char *fmt, ...) 522 #else 523 __mpoolerr(fmt, va_alist) 524 char *fmt; 525 va_dcl 526 #endif 527 { 528 va_list ap; 529 #if __STDC__ 530 va_start(ap, fmt); 531 #else 532 va_start(ap); 533 #endif 534 (void)vfprintf(stderr, fmt, ap); 535 va_end(ap); 536 (void)fprintf(stderr, "\n"); 537 abort(); 538 /* NOTREACHED */ 539 } 540 #endif 541