150982Sbostic /*- 2*62485Sbostic * Copyright (c) 1990, 1993 3*62485Sbostic * The Regents of the University of California. All rights reserved. 450982Sbostic * 550982Sbostic * %sccs.include.redist.c% 650982Sbostic */ 750982Sbostic 850982Sbostic #if defined(LIBC_SCCS) && !defined(lint) 9*62485Sbostic static char sccsid[] = "@(#)mpool.c 8.1 (Berkeley) 06/06/93"; 1050982Sbostic #endif /* LIBC_SCCS and not lint */ 1150982Sbostic 1250982Sbostic #include <sys/param.h> 1350982Sbostic #include <sys/stat.h> 1457933Sbostic 1550982Sbostic #include <errno.h> 1650982Sbostic #include <stdio.h> 1750982Sbostic #include <stdlib.h> 1850982Sbostic #include <string.h> 1957933Sbostic #include <unistd.h> 2057933Sbostic 2157933Sbostic #include <db.h> 2250982Sbostic #define __MPOOLINTERFACE_PRIVATE 2350982Sbostic #include "mpool.h" 2450982Sbostic 2550982Sbostic static BKT *mpool_bkt __P((MPOOL *)); 2650982Sbostic static BKT *mpool_look __P((MPOOL *, pgno_t)); 2750982Sbostic static int mpool_write __P((MPOOL *, BKT *)); 2850982Sbostic #ifdef DEBUG 2962484Sbostic static void __mpoolerr __P((const char *fmt, ...)); 3050982Sbostic #endif 3150982Sbostic 3250982Sbostic /* 3350982Sbostic * MPOOL_OPEN -- initialize a memory pool. 3450982Sbostic * 3550982Sbostic * Parameters: 3650982Sbostic * key: Shared buffer key. 3750982Sbostic * fd: File descriptor. 3850982Sbostic * pagesize: File page size. 3950982Sbostic * maxcache: Max number of cached pages. 4050982Sbostic * 4150982Sbostic * Returns: 4250982Sbostic * MPOOL pointer, NULL on error. 4350982Sbostic */ 4450982Sbostic MPOOL * 4550982Sbostic mpool_open(key, fd, pagesize, maxcache) 4650982Sbostic DBT *key; 4750982Sbostic int fd; 4850982Sbostic pgno_t pagesize, maxcache; 4950982Sbostic { 5050982Sbostic struct stat sb; 5150982Sbostic MPOOL *mp; 5250982Sbostic int entry; 5350982Sbostic 5450982Sbostic if (fstat(fd, &sb)) 5550982Sbostic return (NULL); 5650982Sbostic /* XXX 5750982Sbostic * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so 5850982Sbostic * that stat(2) returns true for ISSOCK on pipes. Until then, this is 5950982Sbostic * fairly close. 6050982Sbostic */ 6150982Sbostic if (!S_ISREG(sb.st_mode)) { 6250982Sbostic errno = ESPIPE; 6350982Sbostic return (NULL); 6450982Sbostic } 6550982Sbostic 6650982Sbostic if ((mp = malloc(sizeof(MPOOL))) == NULL) 6750982Sbostic return (NULL); 6850982Sbostic mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; 6950982Sbostic mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; 7050982Sbostic for (entry = 0; entry < HASHSIZE; ++entry) 7150982Sbostic mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = 7250982Sbostic mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = 7350982Sbostic (BKT *)&mp->hashtable[entry]; 7450982Sbostic mp->curcache = 0; 7550982Sbostic mp->maxcache = maxcache; 7650982Sbostic mp->pagesize = pagesize; 7750982Sbostic mp->npages = sb.st_size / pagesize; 7850982Sbostic mp->fd = fd; 7958072Sbostic mp->pgcookie = NULL; 8058072Sbostic mp->pgin = mp->pgout = NULL; 8150982Sbostic 8250982Sbostic #ifdef STATISTICS 8350982Sbostic mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = 8450982Sbostic mp->pageget = mp->pagenew = mp->pageput = mp->pageread = 8550982Sbostic mp->pagewrite = 0; 8650982Sbostic #endif 8750982Sbostic return (mp); 8850982Sbostic } 8950982Sbostic 9050982Sbostic /* 9150982Sbostic * MPOOL_FILTER -- initialize input/output filters. 9250982Sbostic * 9350982Sbostic * Parameters: 9450982Sbostic * pgin: Page in conversion routine. 9550982Sbostic * pgout: Page out conversion routine. 9650982Sbostic * pgcookie: Cookie for page in/out routines. 9750982Sbostic */ 9850982Sbostic void 9950982Sbostic mpool_filter(mp, pgin, pgout, pgcookie) 10050982Sbostic MPOOL *mp; 10150982Sbostic void (*pgin) __P((void *, pgno_t, void *)); 10250982Sbostic void (*pgout) __P((void *, pgno_t, void *)); 10350982Sbostic void *pgcookie; 10450982Sbostic { 10550982Sbostic mp->pgin = pgin; 10650982Sbostic mp->pgout = pgout; 10750982Sbostic mp->pgcookie = pgcookie; 10850982Sbostic } 10950982Sbostic 11050982Sbostic /* 11150982Sbostic * MPOOL_NEW -- get a new page 11250982Sbostic * 11350982Sbostic * Parameters: 11450982Sbostic * mp: mpool cookie 11550982Sbostic * pgnoadddr: place to store new page number 11650982Sbostic * Returns: 11750982Sbostic * RET_ERROR, RET_SUCCESS 11850982Sbostic */ 11950982Sbostic void * 12050982Sbostic mpool_new(mp, pgnoaddr) 12150982Sbostic MPOOL *mp; 12250982Sbostic pgno_t *pgnoaddr; 12350982Sbostic { 12450982Sbostic BKT *b; 12550982Sbostic BKTHDR *hp; 12650982Sbostic 12750982Sbostic #ifdef STATISTICS 12850982Sbostic ++mp->pagenew; 12950982Sbostic #endif 13050982Sbostic /* 13150982Sbostic * Get a BKT from the cache. Assign a new page number, attach it to 13250982Sbostic * the hash and lru chains and return. 13350982Sbostic */ 13450982Sbostic if ((b = mpool_bkt(mp)) == NULL) 13550982Sbostic return (NULL); 13650982Sbostic *pgnoaddr = b->pgno = mp->npages++; 13750982Sbostic b->flags = MPOOL_PINNED; 13850982Sbostic inshash(b, b->pgno); 13950982Sbostic inschain(b, &mp->lru); 14050982Sbostic return (b->page); 14150982Sbostic } 14250982Sbostic 14350982Sbostic /* 14450982Sbostic * MPOOL_GET -- get a page from the pool 14550982Sbostic * 14650982Sbostic * Parameters: 14750982Sbostic * mp: mpool cookie 14850982Sbostic * pgno: page number 14950982Sbostic * flags: not used 15050982Sbostic * 15150982Sbostic * Returns: 15250982Sbostic * RET_ERROR, RET_SUCCESS 15350982Sbostic */ 15450982Sbostic void * 15550982Sbostic mpool_get(mp, pgno, flags) 15650982Sbostic MPOOL *mp; 15750982Sbostic pgno_t pgno; 15850982Sbostic u_int flags; /* XXX not used? */ 15950982Sbostic { 16050982Sbostic BKT *b; 16150982Sbostic BKTHDR *hp; 16250982Sbostic off_t off; 16350982Sbostic int nr; 16450982Sbostic 16550982Sbostic /* 16650982Sbostic * If asking for a specific page that is already in the cache, find 16750982Sbostic * it and return it. 16850982Sbostic */ 16950982Sbostic if (b = mpool_look(mp, pgno)) { 17050982Sbostic #ifdef STATISTICS 17150982Sbostic ++mp->pageget; 17250982Sbostic #endif 17350982Sbostic #ifdef DEBUG 17450982Sbostic if (b->flags & MPOOL_PINNED) 17562484Sbostic __mpoolerr("mpool_get: page %d already pinned", 17662484Sbostic b->pgno); 17750982Sbostic #endif 17850982Sbostic rmchain(b); 17950982Sbostic inschain(b, &mp->lru); 18050982Sbostic b->flags |= MPOOL_PINNED; 18150982Sbostic return (b->page); 18250982Sbostic } 18350982Sbostic 18450982Sbostic /* Not allowed to retrieve a non-existent page. */ 18550982Sbostic if (pgno >= mp->npages) { 18650982Sbostic errno = EINVAL; 18750982Sbostic return (NULL); 18850982Sbostic } 18950982Sbostic 19050982Sbostic /* Get a page from the cache. */ 19150982Sbostic if ((b = mpool_bkt(mp)) == NULL) 19250982Sbostic return (NULL); 19350982Sbostic b->pgno = pgno; 19450982Sbostic b->flags = MPOOL_PINNED; 19550982Sbostic 19650982Sbostic #ifdef STATISTICS 19750982Sbostic ++mp->pageread; 19850982Sbostic #endif 19950982Sbostic /* Read in the contents. */ 20050982Sbostic off = mp->pagesize * pgno; 20150982Sbostic if (lseek(mp->fd, off, SEEK_SET) != off) 20250982Sbostic return (NULL); 20350982Sbostic if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { 20450982Sbostic if (nr >= 0) 20550982Sbostic errno = EFTYPE; 20650982Sbostic return (NULL); 20750982Sbostic } 20850982Sbostic if (mp->pgin) 20950982Sbostic (mp->pgin)(mp->pgcookie, b->pgno, b->page); 21050982Sbostic 21150982Sbostic inshash(b, b->pgno); 21250982Sbostic inschain(b, &mp->lru); 21350982Sbostic #ifdef STATISTICS 21450982Sbostic ++mp->pageget; 21550982Sbostic #endif 21650982Sbostic return (b->page); 21750982Sbostic } 21850982Sbostic 21950982Sbostic /* 22050982Sbostic * MPOOL_PUT -- return a page to the pool 22150982Sbostic * 22250982Sbostic * Parameters: 22350982Sbostic * mp: mpool cookie 22450982Sbostic * page: page pointer 22550982Sbostic * pgno: page number 22650982Sbostic * 22750982Sbostic * Returns: 22850982Sbostic * RET_ERROR, RET_SUCCESS 22950982Sbostic */ 23050982Sbostic int 23150982Sbostic mpool_put(mp, page, flags) 23250982Sbostic MPOOL *mp; 23350982Sbostic void *page; 23450982Sbostic u_int flags; 23550982Sbostic { 23650982Sbostic BKT *baddr; 23750982Sbostic #ifdef DEBUG 23850982Sbostic BKT *b; 23950982Sbostic #endif 24050982Sbostic 24150982Sbostic #ifdef STATISTICS 24250982Sbostic ++mp->pageput; 24350982Sbostic #endif 24451766Sbostic baddr = (BKT *)((char *)page - sizeof(BKT)); 24550982Sbostic #ifdef DEBUG 24650982Sbostic if (!(baddr->flags & MPOOL_PINNED)) 24762484Sbostic __mpoolerr("mpool_put: page %d not pinned", b->pgno); 24850982Sbostic for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 24950982Sbostic if (b == (BKT *)&mp->lru) 25062484Sbostic __mpoolerr("mpool_put: %0x: bad address", baddr); 25150982Sbostic if (b == baddr) 25250982Sbostic break; 25350982Sbostic } 25450982Sbostic #endif 25550982Sbostic baddr->flags &= ~MPOOL_PINNED; 25650982Sbostic baddr->flags |= flags & MPOOL_DIRTY; 25750982Sbostic return (RET_SUCCESS); 25850982Sbostic } 25950982Sbostic 26050982Sbostic /* 26150982Sbostic * MPOOL_CLOSE -- close the buffer pool 26250982Sbostic * 26350982Sbostic * Parameters: 26450982Sbostic * mp: mpool cookie 26550982Sbostic * 26650982Sbostic * Returns: 26750982Sbostic * RET_ERROR, RET_SUCCESS 26850982Sbostic */ 26950982Sbostic int 27050982Sbostic mpool_close(mp) 27150982Sbostic MPOOL *mp; 27250982Sbostic { 27350982Sbostic BKT *b, *next; 27450982Sbostic 27550982Sbostic /* Free up any space allocated to the lru pages. */ 27650982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { 27750982Sbostic next = b->cprev; 27850982Sbostic free(b); 27950982Sbostic } 28051802Sbostic free(mp); 28150982Sbostic return (RET_SUCCESS); 28250982Sbostic } 28350982Sbostic 28450982Sbostic /* 28550982Sbostic * MPOOL_SYNC -- sync the file to disk. 28650982Sbostic * 28750982Sbostic * Parameters: 28850982Sbostic * mp: mpool cookie 28950982Sbostic * 29050982Sbostic * Returns: 29150982Sbostic * RET_ERROR, RET_SUCCESS 29250982Sbostic */ 29350982Sbostic int 29450982Sbostic mpool_sync(mp) 29550982Sbostic MPOOL *mp; 29650982Sbostic { 29750982Sbostic BKT *b; 29850982Sbostic 29950982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 30050982Sbostic if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) 30150982Sbostic return (RET_ERROR); 30250982Sbostic return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 30350982Sbostic } 30450982Sbostic 30550982Sbostic /* 30650982Sbostic * MPOOL_BKT -- get/create a BKT from the cache 30750982Sbostic * 30850982Sbostic * Parameters: 30950982Sbostic * mp: mpool cookie 31050982Sbostic * 31150982Sbostic * Returns: 31250982Sbostic * NULL on failure and a pointer to the BKT on success 31350982Sbostic */ 31450982Sbostic static BKT * 31550982Sbostic mpool_bkt(mp) 31650982Sbostic MPOOL *mp; 31750982Sbostic { 31850982Sbostic BKT *b; 31950982Sbostic 32050982Sbostic if (mp->curcache < mp->maxcache) 32150982Sbostic goto new; 32250982Sbostic 32350982Sbostic /* 32450982Sbostic * If the cache is maxxed out, search the lru list for a buffer we 32550982Sbostic * can flush. If we find one, write it if necessary and take it off 32650982Sbostic * any lists. If we don't find anything we grow the cache anyway. 32750982Sbostic * The cache never shrinks. 32850982Sbostic */ 32950982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 33050982Sbostic if (!(b->flags & MPOOL_PINNED)) { 33150982Sbostic if (b->flags & MPOOL_DIRTY && 33250982Sbostic mpool_write(mp, b) == RET_ERROR) 33350982Sbostic return (NULL); 33450982Sbostic rmhash(b); 33550982Sbostic rmchain(b); 33650982Sbostic #ifdef STATISTICS 33750982Sbostic ++mp->pageflush; 33850982Sbostic #endif 33950982Sbostic #ifdef DEBUG 34050982Sbostic { 34150982Sbostic void *spage; 34250982Sbostic spage = b->page; 34350982Sbostic memset(b, 0xff, sizeof(BKT) + mp->pagesize); 34450982Sbostic b->page = spage; 34550982Sbostic } 34650982Sbostic #endif 34750982Sbostic return (b); 34850982Sbostic } 34950982Sbostic 35050982Sbostic new: if ((b = malloc(sizeof(BKT) + mp->pagesize)) == NULL) 35150982Sbostic return (NULL); 35250982Sbostic #ifdef STATISTICS 35350982Sbostic ++mp->pagealloc; 35450982Sbostic #endif 35550982Sbostic #ifdef DEBUG 35650982Sbostic memset(b, 0xff, sizeof(BKT) + mp->pagesize); 35750982Sbostic #endif 35850982Sbostic b->page = (char *)b + sizeof(BKT); 35950982Sbostic ++mp->curcache; 36050982Sbostic return (b); 36150982Sbostic } 36250982Sbostic 36350982Sbostic /* 36450982Sbostic * MPOOL_WRITE -- sync a page to disk 36550982Sbostic * 36650982Sbostic * Parameters: 36750982Sbostic * mp: mpool cookie 36850982Sbostic * 36950982Sbostic * Returns: 37050982Sbostic * RET_ERROR, RET_SUCCESS 37150982Sbostic */ 37250982Sbostic static int 37350982Sbostic mpool_write(mp, b) 37450982Sbostic MPOOL *mp; 37550982Sbostic BKT *b; 37650982Sbostic { 37750982Sbostic off_t off; 37850982Sbostic 37950982Sbostic if (mp->pgout) 38050982Sbostic (mp->pgout)(mp->pgcookie, b->pgno, b->page); 38150982Sbostic 38250982Sbostic #ifdef STATISTICS 38350982Sbostic ++mp->pagewrite; 38450982Sbostic #endif 38550982Sbostic off = mp->pagesize * b->pgno; 38650982Sbostic if (lseek(mp->fd, off, SEEK_SET) != off) 38750982Sbostic return (RET_ERROR); 38850982Sbostic if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) 38950982Sbostic return (RET_ERROR); 39050982Sbostic b->flags &= ~MPOOL_DIRTY; 39150982Sbostic return (RET_SUCCESS); 39250982Sbostic } 39350982Sbostic 39450982Sbostic /* 39550982Sbostic * MPOOL_LOOK -- lookup a page 39650982Sbostic * 39750982Sbostic * Parameters: 39850982Sbostic * mp: mpool cookie 39950982Sbostic * pgno: page number 40050982Sbostic * 40150982Sbostic * Returns: 40250982Sbostic * NULL on failure and a pointer to the BKT on success 40350982Sbostic */ 40450982Sbostic static BKT * 40550982Sbostic mpool_look(mp, pgno) 40650982Sbostic MPOOL *mp; 40750982Sbostic pgno_t pgno; 40850982Sbostic { 40950982Sbostic register BKT *b; 41050982Sbostic register BKTHDR *tb; 41150982Sbostic 41250982Sbostic /* XXX 41350982Sbostic * If find the buffer, put it first on the hash chain so can 41450982Sbostic * find it again quickly. 41550982Sbostic */ 41650982Sbostic tb = &mp->hashtable[HASHKEY(pgno)]; 41750982Sbostic for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) 41850982Sbostic if (b->pgno == pgno) { 41950982Sbostic #ifdef STATISTICS 42050982Sbostic ++mp->cachehit; 42150982Sbostic #endif 42250982Sbostic return (b); 42350982Sbostic } 42450982Sbostic #ifdef STATISTICS 42550982Sbostic ++mp->cachemiss; 42650982Sbostic #endif 42750982Sbostic return (NULL); 42850982Sbostic } 42950982Sbostic 43050982Sbostic #ifdef STATISTICS 43150982Sbostic /* 43250982Sbostic * MPOOL_STAT -- cache statistics 43350982Sbostic * 43450982Sbostic * Parameters: 43550982Sbostic * mp: mpool cookie 43650982Sbostic */ 43750982Sbostic void 43850982Sbostic mpool_stat(mp) 43950982Sbostic MPOOL *mp; 44050982Sbostic { 44150982Sbostic BKT *b; 44250982Sbostic int cnt; 44350982Sbostic char *sep; 44450982Sbostic 44550982Sbostic (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 44650982Sbostic (void)fprintf(stderr, 44750982Sbostic "page size %lu, cacheing %lu pages of %lu page max cache\n", 44850982Sbostic mp->pagesize, mp->curcache, mp->maxcache); 44950982Sbostic (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 45050982Sbostic mp->pageput, mp->pageget, mp->pagenew); 45150982Sbostic (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 45250982Sbostic mp->pagealloc, mp->pageflush); 45350982Sbostic if (mp->cachehit + mp->cachemiss) 45450982Sbostic (void)fprintf(stderr, 45550982Sbostic "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 45650982Sbostic ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 45750982Sbostic * 100, mp->cachehit, mp->cachemiss); 45850982Sbostic (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 45950982Sbostic mp->pageread, mp->pagewrite); 46050982Sbostic 46150982Sbostic sep = ""; 46250982Sbostic cnt = 0; 46350982Sbostic for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 46450982Sbostic (void)fprintf(stderr, "%s%d", sep, b->pgno); 46550982Sbostic if (b->flags & MPOOL_DIRTY) 46650982Sbostic (void)fprintf(stderr, "d"); 46750982Sbostic if (b->flags & MPOOL_PINNED) 46850982Sbostic (void)fprintf(stderr, "P"); 46950982Sbostic if (++cnt == 10) { 47050982Sbostic sep = "\n"; 47150982Sbostic cnt = 0; 47250982Sbostic } else 47350982Sbostic sep = ", "; 47450982Sbostic 47550982Sbostic } 47650982Sbostic (void)fprintf(stderr, "\n"); 47750982Sbostic } 47850982Sbostic #endif 47950982Sbostic 48050982Sbostic #ifdef DEBUG 48150982Sbostic #if __STDC__ 48250982Sbostic #include <stdarg.h> 48350982Sbostic #else 48450982Sbostic #include <varargs.h> 48550982Sbostic #endif 48650982Sbostic 48750982Sbostic static void 48850982Sbostic #if __STDC__ 48962484Sbostic __mpoolerr(const char *fmt, ...) 49050982Sbostic #else 49162484Sbostic __mpoolerr(fmt, va_alist) 49250982Sbostic char *fmt; 49350982Sbostic va_dcl 49450982Sbostic #endif 49550982Sbostic { 49650982Sbostic va_list ap; 49750982Sbostic #if __STDC__ 49850982Sbostic va_start(ap, fmt); 49950982Sbostic #else 50050982Sbostic va_start(ap); 50150982Sbostic #endif 50250982Sbostic (void)vfprintf(stderr, fmt, ap); 50350982Sbostic va_end(ap); 50450982Sbostic (void)fprintf(stderr, "\n"); 50550982Sbostic abort(); 50650982Sbostic /* NOTREACHED */ 50750982Sbostic } 50850982Sbostic #endif 509