1*50982Sbostic /*- 2*50982Sbostic * Copyright (c) 1990 The Regents of the University of California. 3*50982Sbostic * All rights reserved. 4*50982Sbostic * 5*50982Sbostic * %sccs.include.redist.c% 6*50982Sbostic */ 7*50982Sbostic 8*50982Sbostic #if defined(LIBC_SCCS) && !defined(lint) 9*50982Sbostic static char sccsid[] = "@(#)mpool.c 5.1 (Berkeley) 09/04/91"; 10*50982Sbostic #endif /* LIBC_SCCS and not lint */ 11*50982Sbostic 12*50982Sbostic #include <sys/param.h> 13*50982Sbostic #include <sys/stat.h> 14*50982Sbostic #include <errno.h> 15*50982Sbostic #include <db.h> 16*50982Sbostic #include <unistd.h> 17*50982Sbostic #include <stdio.h> 18*50982Sbostic #include <stdlib.h> 19*50982Sbostic #include <string.h> 20*50982Sbostic #define __MPOOLINTERFACE_PRIVATE 21*50982Sbostic #include "mpool.h" 22*50982Sbostic 23*50982Sbostic static BKT *mpool_bkt __P((MPOOL *)); 24*50982Sbostic static BKT *mpool_look __P((MPOOL *, pgno_t)); 25*50982Sbostic static int mpool_write __P((MPOOL *, BKT *)); 26*50982Sbostic #ifdef DEBUG 27*50982Sbostic static void err __P((const char *fmt, ...)); 28*50982Sbostic #endif 29*50982Sbostic 30*50982Sbostic /* 31*50982Sbostic * MPOOL_OPEN -- initialize a memory pool. 32*50982Sbostic * 33*50982Sbostic * Parameters: 34*50982Sbostic * key: Shared buffer key. 35*50982Sbostic * fd: File descriptor. 36*50982Sbostic * pagesize: File page size. 37*50982Sbostic * maxcache: Max number of cached pages. 38*50982Sbostic * 39*50982Sbostic * Returns: 40*50982Sbostic * MPOOL pointer, NULL on error. 41*50982Sbostic */ 42*50982Sbostic MPOOL * 43*50982Sbostic mpool_open(key, fd, pagesize, maxcache) 44*50982Sbostic DBT *key; 45*50982Sbostic int fd; 46*50982Sbostic pgno_t pagesize, maxcache; 47*50982Sbostic { 48*50982Sbostic struct stat sb; 49*50982Sbostic MPOOL *mp; 50*50982Sbostic int entry; 51*50982Sbostic 52*50982Sbostic if (fstat(fd, &sb)) 53*50982Sbostic return (NULL); 54*50982Sbostic /* XXX 55*50982Sbostic * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so 56*50982Sbostic * that stat(2) returns true for ISSOCK on pipes. Until then, this is 57*50982Sbostic * fairly close. 58*50982Sbostic */ 59*50982Sbostic if (!S_ISREG(sb.st_mode)) { 60*50982Sbostic errno = ESPIPE; 61*50982Sbostic return (NULL); 62*50982Sbostic } 63*50982Sbostic 64*50982Sbostic if ((mp = malloc(sizeof(MPOOL))) == NULL) 65*50982Sbostic return (NULL); 66*50982Sbostic mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; 67*50982Sbostic mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; 68*50982Sbostic for (entry = 0; entry < HASHSIZE; ++entry) 69*50982Sbostic mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = 70*50982Sbostic mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = 71*50982Sbostic (BKT *)&mp->hashtable[entry]; 72*50982Sbostic mp->curcache = 0; 73*50982Sbostic mp->maxcache = maxcache; 74*50982Sbostic mp->pagesize = pagesize; 75*50982Sbostic mp->npages = sb.st_size / pagesize; 76*50982Sbostic mp->fd = fd; 77*50982Sbostic 78*50982Sbostic #ifdef STATISTICS 79*50982Sbostic mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = 80*50982Sbostic mp->pageget = mp->pagenew = mp->pageput = mp->pageread = 81*50982Sbostic mp->pagewrite = 0; 82*50982Sbostic #endif 83*50982Sbostic return (mp); 84*50982Sbostic } 85*50982Sbostic 86*50982Sbostic /* 87*50982Sbostic * MPOOL_FILTER -- initialize input/output filters. 88*50982Sbostic * 89*50982Sbostic * Parameters: 90*50982Sbostic * pgin: Page in conversion routine. 91*50982Sbostic * pgout: Page out conversion routine. 92*50982Sbostic * pgcookie: Cookie for page in/out routines. 93*50982Sbostic */ 94*50982Sbostic void 95*50982Sbostic mpool_filter(mp, pgin, pgout, pgcookie) 96*50982Sbostic MPOOL *mp; 97*50982Sbostic void (*pgin) __P((void *, pgno_t, void *)); 98*50982Sbostic void (*pgout) __P((void *, pgno_t, void *)); 99*50982Sbostic void *pgcookie; 100*50982Sbostic { 101*50982Sbostic mp->pgin = pgin; 102*50982Sbostic mp->pgout = pgout; 103*50982Sbostic mp->pgcookie = pgcookie; 104*50982Sbostic } 105*50982Sbostic 106*50982Sbostic /* 107*50982Sbostic * MPOOL_NEW -- get a new page 108*50982Sbostic * 109*50982Sbostic * Parameters: 110*50982Sbostic * mp: mpool cookie 111*50982Sbostic * pgnoadddr: place to store new page number 112*50982Sbostic * Returns: 113*50982Sbostic * RET_ERROR, RET_SUCCESS 114*50982Sbostic */ 115*50982Sbostic void * 116*50982Sbostic mpool_new(mp, pgnoaddr) 117*50982Sbostic MPOOL *mp; 118*50982Sbostic pgno_t *pgnoaddr; 119*50982Sbostic { 120*50982Sbostic BKT *b; 121*50982Sbostic BKTHDR *hp; 122*50982Sbostic 123*50982Sbostic #ifdef STATISTICS 124*50982Sbostic ++mp->pagenew; 125*50982Sbostic #endif 126*50982Sbostic /* 127*50982Sbostic * Get a BKT from the cache. Assign a new page number, attach it to 128*50982Sbostic * the hash and lru chains and return. 129*50982Sbostic */ 130*50982Sbostic if ((b = mpool_bkt(mp)) == NULL) 131*50982Sbostic return (NULL); 132*50982Sbostic *pgnoaddr = b->pgno = mp->npages++; 133*50982Sbostic b->flags = MPOOL_PINNED; 134*50982Sbostic inshash(b, b->pgno); 135*50982Sbostic inschain(b, &mp->lru); 136*50982Sbostic return (b->page); 137*50982Sbostic } 138*50982Sbostic 139*50982Sbostic /* 140*50982Sbostic * MPOOL_GET -- get a page from the pool 141*50982Sbostic * 142*50982Sbostic * Parameters: 143*50982Sbostic * mp: mpool cookie 144*50982Sbostic * pgno: page number 145*50982Sbostic * flags: not used 146*50982Sbostic * 147*50982Sbostic * Returns: 148*50982Sbostic * RET_ERROR, RET_SUCCESS 149*50982Sbostic */ 150*50982Sbostic void * 151*50982Sbostic mpool_get(mp, pgno, flags) 152*50982Sbostic MPOOL *mp; 153*50982Sbostic pgno_t pgno; 154*50982Sbostic u_int flags; /* XXX not used? */ 155*50982Sbostic { 156*50982Sbostic BKT *b; 157*50982Sbostic BKTHDR *hp; 158*50982Sbostic off_t off; 159*50982Sbostic int nr; 160*50982Sbostic 161*50982Sbostic /* 162*50982Sbostic * If asking for a specific page that is already in the cache, find 163*50982Sbostic * it and return it. 164*50982Sbostic */ 165*50982Sbostic if (b = mpool_look(mp, pgno)) { 166*50982Sbostic #ifdef STATISTICS 167*50982Sbostic ++mp->pageget; 168*50982Sbostic #endif 169*50982Sbostic #ifdef DEBUG 170*50982Sbostic if (b->flags & MPOOL_PINNED) 171*50982Sbostic err("mpool_get: page %d already pinned", b->pgno); 172*50982Sbostic #endif 173*50982Sbostic rmchain(b); 174*50982Sbostic inschain(b, &mp->lru); 175*50982Sbostic b->flags |= MPOOL_PINNED; 176*50982Sbostic return (b->page); 177*50982Sbostic } 178*50982Sbostic 179*50982Sbostic /* Not allowed to retrieve a non-existent page. */ 180*50982Sbostic if (pgno >= mp->npages) { 181*50982Sbostic errno = EINVAL; 182*50982Sbostic return (NULL); 183*50982Sbostic } 184*50982Sbostic 185*50982Sbostic /* Get a page from the cache. */ 186*50982Sbostic if ((b = mpool_bkt(mp)) == NULL) 187*50982Sbostic return (NULL); 188*50982Sbostic b->pgno = pgno; 189*50982Sbostic b->flags = MPOOL_PINNED; 190*50982Sbostic 191*50982Sbostic #ifdef STATISTICS 192*50982Sbostic ++mp->pageread; 193*50982Sbostic #endif 194*50982Sbostic /* Read in the contents. */ 195*50982Sbostic off = mp->pagesize * pgno; 196*50982Sbostic if (lseek(mp->fd, off, SEEK_SET) != off) 197*50982Sbostic return (NULL); 198*50982Sbostic if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { 199*50982Sbostic if (nr >= 0) 200*50982Sbostic errno = EFTYPE; 201*50982Sbostic return (NULL); 202*50982Sbostic } 203*50982Sbostic if (mp->pgin) 204*50982Sbostic (mp->pgin)(mp->pgcookie, b->pgno, b->page); 205*50982Sbostic 206*50982Sbostic inshash(b, b->pgno); 207*50982Sbostic inschain(b, &mp->lru); 208*50982Sbostic #ifdef STATISTICS 209*50982Sbostic ++mp->pageget; 210*50982Sbostic #endif 211*50982Sbostic return (b->page); 212*50982Sbostic } 213*50982Sbostic 214*50982Sbostic /* 215*50982Sbostic * MPOOL_PUT -- return a page to the pool 216*50982Sbostic * 217*50982Sbostic * Parameters: 218*50982Sbostic * mp: mpool cookie 219*50982Sbostic * page: page pointer 220*50982Sbostic * pgno: page number 221*50982Sbostic * 222*50982Sbostic * Returns: 223*50982Sbostic * RET_ERROR, RET_SUCCESS 224*50982Sbostic */ 225*50982Sbostic int 226*50982Sbostic mpool_put(mp, page, flags) 227*50982Sbostic MPOOL *mp; 228*50982Sbostic void *page; 229*50982Sbostic u_int flags; 230*50982Sbostic { 231*50982Sbostic BKT *baddr; 232*50982Sbostic #ifdef DEBUG 233*50982Sbostic BKT *b; 234*50982Sbostic #endif 235*50982Sbostic 236*50982Sbostic #ifdef STATISTICS 237*50982Sbostic ++mp->pageput; 238*50982Sbostic #endif 239*50982Sbostic baddr = page - sizeof(BKT); 240*50982Sbostic #ifdef DEBUG 241*50982Sbostic if (!(baddr->flags & MPOOL_PINNED)) 242*50982Sbostic err("mpool_put: page %d not pinned", b->pgno); 243*50982Sbostic for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 244*50982Sbostic if (b == (BKT *)&mp->lru) 245*50982Sbostic err("mpool_put: %0x: bad address", baddr); 246*50982Sbostic if (b == baddr) 247*50982Sbostic break; 248*50982Sbostic } 249*50982Sbostic #endif 250*50982Sbostic baddr->flags &= ~MPOOL_PINNED; 251*50982Sbostic baddr->flags |= flags & MPOOL_DIRTY; 252*50982Sbostic return (RET_SUCCESS); 253*50982Sbostic } 254*50982Sbostic 255*50982Sbostic /* 256*50982Sbostic * MPOOL_CLOSE -- close the buffer pool 257*50982Sbostic * 258*50982Sbostic * Parameters: 259*50982Sbostic * mp: mpool cookie 260*50982Sbostic * 261*50982Sbostic * Returns: 262*50982Sbostic * RET_ERROR, RET_SUCCESS 263*50982Sbostic */ 264*50982Sbostic int 265*50982Sbostic mpool_close(mp) 266*50982Sbostic MPOOL *mp; 267*50982Sbostic { 268*50982Sbostic BKT *b, *next; 269*50982Sbostic 270*50982Sbostic /* Free up any space allocated to the lru pages. */ 271*50982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { 272*50982Sbostic next = b->cprev; 273*50982Sbostic free(b->page); 274*50982Sbostic free(b); 275*50982Sbostic } 276*50982Sbostic return (RET_SUCCESS); 277*50982Sbostic } 278*50982Sbostic 279*50982Sbostic /* 280*50982Sbostic * MPOOL_SYNC -- sync the file to disk. 281*50982Sbostic * 282*50982Sbostic * Parameters: 283*50982Sbostic * mp: mpool cookie 284*50982Sbostic * 285*50982Sbostic * Returns: 286*50982Sbostic * RET_ERROR, RET_SUCCESS 287*50982Sbostic */ 288*50982Sbostic int 289*50982Sbostic mpool_sync(mp) 290*50982Sbostic MPOOL *mp; 291*50982Sbostic { 292*50982Sbostic BKT *b; 293*50982Sbostic 294*50982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 295*50982Sbostic if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) 296*50982Sbostic return (RET_ERROR); 297*50982Sbostic return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 298*50982Sbostic } 299*50982Sbostic 300*50982Sbostic /* 301*50982Sbostic * MPOOL_BKT -- get/create a BKT from the cache 302*50982Sbostic * 303*50982Sbostic * Parameters: 304*50982Sbostic * mp: mpool cookie 305*50982Sbostic * 306*50982Sbostic * Returns: 307*50982Sbostic * NULL on failure and a pointer to the BKT on success 308*50982Sbostic */ 309*50982Sbostic static BKT * 310*50982Sbostic mpool_bkt(mp) 311*50982Sbostic MPOOL *mp; 312*50982Sbostic { 313*50982Sbostic BKT *b; 314*50982Sbostic 315*50982Sbostic if (mp->curcache < mp->maxcache) 316*50982Sbostic goto new; 317*50982Sbostic 318*50982Sbostic /* 319*50982Sbostic * If the cache is maxxed out, search the lru list for a buffer we 320*50982Sbostic * can flush. If we find one, write it if necessary and take it off 321*50982Sbostic * any lists. If we don't find anything we grow the cache anyway. 322*50982Sbostic * The cache never shrinks. 323*50982Sbostic */ 324*50982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 325*50982Sbostic if (!(b->flags & MPOOL_PINNED)) { 326*50982Sbostic if (b->flags & MPOOL_DIRTY && 327*50982Sbostic mpool_write(mp, b) == RET_ERROR) 328*50982Sbostic return (NULL); 329*50982Sbostic rmhash(b); 330*50982Sbostic rmchain(b); 331*50982Sbostic #ifdef STATISTICS 332*50982Sbostic ++mp->pageflush; 333*50982Sbostic #endif 334*50982Sbostic #ifdef DEBUG 335*50982Sbostic { 336*50982Sbostic void *spage; 337*50982Sbostic spage = b->page; 338*50982Sbostic memset(b, 0xff, sizeof(BKT) + mp->pagesize); 339*50982Sbostic b->page = spage; 340*50982Sbostic } 341*50982Sbostic #endif 342*50982Sbostic return (b); 343*50982Sbostic } 344*50982Sbostic 345*50982Sbostic new: if ((b = malloc(sizeof(BKT) + mp->pagesize)) == NULL) 346*50982Sbostic return (NULL); 347*50982Sbostic #ifdef STATISTICS 348*50982Sbostic ++mp->pagealloc; 349*50982Sbostic #endif 350*50982Sbostic #ifdef DEBUG 351*50982Sbostic memset(b, 0xff, sizeof(BKT) + mp->pagesize); 352*50982Sbostic #endif 353*50982Sbostic b->page = (char *)b + sizeof(BKT); 354*50982Sbostic ++mp->curcache; 355*50982Sbostic return (b); 356*50982Sbostic } 357*50982Sbostic 358*50982Sbostic /* 359*50982Sbostic * MPOOL_WRITE -- sync a page to disk 360*50982Sbostic * 361*50982Sbostic * Parameters: 362*50982Sbostic * mp: mpool cookie 363*50982Sbostic * 364*50982Sbostic * Returns: 365*50982Sbostic * RET_ERROR, RET_SUCCESS 366*50982Sbostic */ 367*50982Sbostic static int 368*50982Sbostic mpool_write(mp, b) 369*50982Sbostic MPOOL *mp; 370*50982Sbostic BKT *b; 371*50982Sbostic { 372*50982Sbostic off_t off; 373*50982Sbostic 374*50982Sbostic if (mp->pgout) 375*50982Sbostic (mp->pgout)(mp->pgcookie, b->pgno, b->page); 376*50982Sbostic 377*50982Sbostic #ifdef STATISTICS 378*50982Sbostic ++mp->pagewrite; 379*50982Sbostic #endif 380*50982Sbostic off = mp->pagesize * b->pgno; 381*50982Sbostic if (lseek(mp->fd, off, SEEK_SET) != off) 382*50982Sbostic return (RET_ERROR); 383*50982Sbostic if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) 384*50982Sbostic return (RET_ERROR); 385*50982Sbostic b->flags &= ~MPOOL_DIRTY; 386*50982Sbostic return (RET_SUCCESS); 387*50982Sbostic } 388*50982Sbostic 389*50982Sbostic /* 390*50982Sbostic * MPOOL_LOOK -- lookup a page 391*50982Sbostic * 392*50982Sbostic * Parameters: 393*50982Sbostic * mp: mpool cookie 394*50982Sbostic * pgno: page number 395*50982Sbostic * 396*50982Sbostic * Returns: 397*50982Sbostic * NULL on failure and a pointer to the BKT on success 398*50982Sbostic */ 399*50982Sbostic static BKT * 400*50982Sbostic mpool_look(mp, pgno) 401*50982Sbostic MPOOL *mp; 402*50982Sbostic pgno_t pgno; 403*50982Sbostic { 404*50982Sbostic register BKT *b; 405*50982Sbostic register BKTHDR *tb; 406*50982Sbostic 407*50982Sbostic /* XXX 408*50982Sbostic * If find the buffer, put it first on the hash chain so can 409*50982Sbostic * find it again quickly. 410*50982Sbostic */ 411*50982Sbostic tb = &mp->hashtable[HASHKEY(pgno)]; 412*50982Sbostic for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) 413*50982Sbostic if (b->pgno == pgno) { 414*50982Sbostic #ifdef STATISTICS 415*50982Sbostic ++mp->cachehit; 416*50982Sbostic #endif 417*50982Sbostic return (b); 418*50982Sbostic } 419*50982Sbostic #ifdef STATISTICS 420*50982Sbostic ++mp->cachemiss; 421*50982Sbostic #endif 422*50982Sbostic return (NULL); 423*50982Sbostic } 424*50982Sbostic 425*50982Sbostic #ifdef STATISTICS 426*50982Sbostic /* 427*50982Sbostic * MPOOL_STAT -- cache statistics 428*50982Sbostic * 429*50982Sbostic * Parameters: 430*50982Sbostic * mp: mpool cookie 431*50982Sbostic */ 432*50982Sbostic void 433*50982Sbostic mpool_stat(mp) 434*50982Sbostic MPOOL *mp; 435*50982Sbostic { 436*50982Sbostic BKT *b; 437*50982Sbostic int cnt; 438*50982Sbostic char *sep; 439*50982Sbostic 440*50982Sbostic (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 441*50982Sbostic (void)fprintf(stderr, 442*50982Sbostic "page size %lu, cacheing %lu pages of %lu page max cache\n", 443*50982Sbostic mp->pagesize, mp->curcache, mp->maxcache); 444*50982Sbostic (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 445*50982Sbostic mp->pageput, mp->pageget, mp->pagenew); 446*50982Sbostic (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 447*50982Sbostic mp->pagealloc, mp->pageflush); 448*50982Sbostic if (mp->cachehit + mp->cachemiss) 449*50982Sbostic (void)fprintf(stderr, 450*50982Sbostic "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 451*50982Sbostic ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 452*50982Sbostic * 100, mp->cachehit, mp->cachemiss); 453*50982Sbostic (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 454*50982Sbostic mp->pageread, mp->pagewrite); 455*50982Sbostic 456*50982Sbostic sep = ""; 457*50982Sbostic cnt = 0; 458*50982Sbostic for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 459*50982Sbostic (void)fprintf(stderr, "%s%d", sep, b->pgno); 460*50982Sbostic if (b->flags & MPOOL_DIRTY) 461*50982Sbostic (void)fprintf(stderr, "d"); 462*50982Sbostic if (b->flags & MPOOL_PINNED) 463*50982Sbostic (void)fprintf(stderr, "P"); 464*50982Sbostic if (++cnt == 10) { 465*50982Sbostic sep = "\n"; 466*50982Sbostic cnt = 0; 467*50982Sbostic } else 468*50982Sbostic sep = ", "; 469*50982Sbostic 470*50982Sbostic } 471*50982Sbostic (void)fprintf(stderr, "\n"); 472*50982Sbostic } 473*50982Sbostic #endif 474*50982Sbostic 475*50982Sbostic #ifdef DEBUG 476*50982Sbostic #if __STDC__ 477*50982Sbostic #include <stdarg.h> 478*50982Sbostic #else 479*50982Sbostic #include <varargs.h> 480*50982Sbostic #endif 481*50982Sbostic 482*50982Sbostic static void 483*50982Sbostic #if __STDC__ 484*50982Sbostic err(const char *fmt, ...) 485*50982Sbostic #else 486*50982Sbostic err(fmt, va_alist) 487*50982Sbostic char *fmt; 488*50982Sbostic va_dcl 489*50982Sbostic #endif 490*50982Sbostic { 491*50982Sbostic va_list ap; 492*50982Sbostic #if __STDC__ 493*50982Sbostic va_start(ap, fmt); 494*50982Sbostic #else 495*50982Sbostic va_start(ap); 496*50982Sbostic #endif 497*50982Sbostic (void)vfprintf(stderr, fmt, ap); 498*50982Sbostic va_end(ap); 499*50982Sbostic (void)fprintf(stderr, "\n"); 500*50982Sbostic abort(); 501*50982Sbostic /* NOTREACHED */ 502*50982Sbostic } 503*50982Sbostic #endif 504