150982Sbostic /*- 250982Sbostic * Copyright (c) 1990 The Regents of the University of California. 350982Sbostic * All rights reserved. 450982Sbostic * 550982Sbostic * %sccs.include.redist.c% 650982Sbostic */ 750982Sbostic 850982Sbostic #if defined(LIBC_SCCS) && !defined(lint) 9*51766Sbostic static char sccsid[] = "@(#)mpool.c 5.2 (Berkeley) 11/20/91"; 1050982Sbostic #endif /* LIBC_SCCS and not lint */ 1150982Sbostic 1250982Sbostic #include <sys/param.h> 1350982Sbostic #include <sys/stat.h> 1450982Sbostic #include <errno.h> 1550982Sbostic #include <db.h> 1650982Sbostic #include <unistd.h> 1750982Sbostic #include <stdio.h> 1850982Sbostic #include <stdlib.h> 1950982Sbostic #include <string.h> 2050982Sbostic #define __MPOOLINTERFACE_PRIVATE 2150982Sbostic #include "mpool.h" 2250982Sbostic 2350982Sbostic static BKT *mpool_bkt __P((MPOOL *)); 2450982Sbostic static BKT *mpool_look __P((MPOOL *, pgno_t)); 2550982Sbostic static int mpool_write __P((MPOOL *, BKT *)); 2650982Sbostic #ifdef DEBUG 2750982Sbostic static void err __P((const char *fmt, ...)); 2850982Sbostic #endif 2950982Sbostic 3050982Sbostic /* 3150982Sbostic * MPOOL_OPEN -- initialize a memory pool. 3250982Sbostic * 3350982Sbostic * Parameters: 3450982Sbostic * key: Shared buffer key. 3550982Sbostic * fd: File descriptor. 3650982Sbostic * pagesize: File page size. 3750982Sbostic * maxcache: Max number of cached pages. 3850982Sbostic * 3950982Sbostic * Returns: 4050982Sbostic * MPOOL pointer, NULL on error. 4150982Sbostic */ 4250982Sbostic MPOOL * 4350982Sbostic mpool_open(key, fd, pagesize, maxcache) 4450982Sbostic DBT *key; 4550982Sbostic int fd; 4650982Sbostic pgno_t pagesize, maxcache; 4750982Sbostic { 4850982Sbostic struct stat sb; 4950982Sbostic MPOOL *mp; 5050982Sbostic int entry; 5150982Sbostic 5250982Sbostic if (fstat(fd, &sb)) 5350982Sbostic return (NULL); 5450982Sbostic /* XXX 5550982Sbostic * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so 5650982Sbostic * that stat(2) returns true for ISSOCK on pipes. Until then, this is 5750982Sbostic * fairly close. 5850982Sbostic */ 5950982Sbostic if (!S_ISREG(sb.st_mode)) { 6050982Sbostic errno = ESPIPE; 6150982Sbostic return (NULL); 6250982Sbostic } 6350982Sbostic 6450982Sbostic if ((mp = malloc(sizeof(MPOOL))) == NULL) 6550982Sbostic return (NULL); 6650982Sbostic mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; 6750982Sbostic mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; 6850982Sbostic for (entry = 0; entry < HASHSIZE; ++entry) 6950982Sbostic mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = 7050982Sbostic mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = 7150982Sbostic (BKT *)&mp->hashtable[entry]; 7250982Sbostic mp->curcache = 0; 7350982Sbostic mp->maxcache = maxcache; 7450982Sbostic mp->pagesize = pagesize; 7550982Sbostic mp->npages = sb.st_size / pagesize; 7650982Sbostic mp->fd = fd; 7750982Sbostic 7850982Sbostic #ifdef STATISTICS 7950982Sbostic mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = 8050982Sbostic mp->pageget = mp->pagenew = mp->pageput = mp->pageread = 8150982Sbostic mp->pagewrite = 0; 8250982Sbostic #endif 8350982Sbostic return (mp); 8450982Sbostic } 8550982Sbostic 8650982Sbostic /* 8750982Sbostic * MPOOL_FILTER -- initialize input/output filters. 8850982Sbostic * 8950982Sbostic * Parameters: 9050982Sbostic * pgin: Page in conversion routine. 9150982Sbostic * pgout: Page out conversion routine. 9250982Sbostic * pgcookie: Cookie for page in/out routines. 9350982Sbostic */ 9450982Sbostic void 9550982Sbostic mpool_filter(mp, pgin, pgout, pgcookie) 9650982Sbostic MPOOL *mp; 9750982Sbostic void (*pgin) __P((void *, pgno_t, void *)); 9850982Sbostic void (*pgout) __P((void *, pgno_t, void *)); 9950982Sbostic void *pgcookie; 10050982Sbostic { 10150982Sbostic mp->pgin = pgin; 10250982Sbostic mp->pgout = pgout; 10350982Sbostic mp->pgcookie = pgcookie; 10450982Sbostic } 10550982Sbostic 10650982Sbostic /* 10750982Sbostic * MPOOL_NEW -- get a new page 10850982Sbostic * 10950982Sbostic * Parameters: 11050982Sbostic * mp: mpool cookie 11150982Sbostic * pgnoadddr: place to store new page number 11250982Sbostic * Returns: 11350982Sbostic * RET_ERROR, RET_SUCCESS 11450982Sbostic */ 11550982Sbostic void * 11650982Sbostic mpool_new(mp, pgnoaddr) 11750982Sbostic MPOOL *mp; 11850982Sbostic pgno_t *pgnoaddr; 11950982Sbostic { 12050982Sbostic BKT *b; 12150982Sbostic BKTHDR *hp; 12250982Sbostic 12350982Sbostic #ifdef STATISTICS 12450982Sbostic ++mp->pagenew; 12550982Sbostic #endif 12650982Sbostic /* 12750982Sbostic * Get a BKT from the cache. Assign a new page number, attach it to 12850982Sbostic * the hash and lru chains and return. 12950982Sbostic */ 13050982Sbostic if ((b = mpool_bkt(mp)) == NULL) 13150982Sbostic return (NULL); 13250982Sbostic *pgnoaddr = b->pgno = mp->npages++; 13350982Sbostic b->flags = MPOOL_PINNED; 13450982Sbostic inshash(b, b->pgno); 13550982Sbostic inschain(b, &mp->lru); 13650982Sbostic return (b->page); 13750982Sbostic } 13850982Sbostic 13950982Sbostic /* 14050982Sbostic * MPOOL_GET -- get a page from the pool 14150982Sbostic * 14250982Sbostic * Parameters: 14350982Sbostic * mp: mpool cookie 14450982Sbostic * pgno: page number 14550982Sbostic * flags: not used 14650982Sbostic * 14750982Sbostic * Returns: 14850982Sbostic * RET_ERROR, RET_SUCCESS 14950982Sbostic */ 15050982Sbostic void * 15150982Sbostic mpool_get(mp, pgno, flags) 15250982Sbostic MPOOL *mp; 15350982Sbostic pgno_t pgno; 15450982Sbostic u_int flags; /* XXX not used? */ 15550982Sbostic { 15650982Sbostic BKT *b; 15750982Sbostic BKTHDR *hp; 15850982Sbostic off_t off; 15950982Sbostic int nr; 16050982Sbostic 16150982Sbostic /* 16250982Sbostic * If asking for a specific page that is already in the cache, find 16350982Sbostic * it and return it. 16450982Sbostic */ 16550982Sbostic if (b = mpool_look(mp, pgno)) { 16650982Sbostic #ifdef STATISTICS 16750982Sbostic ++mp->pageget; 16850982Sbostic #endif 16950982Sbostic #ifdef DEBUG 17050982Sbostic if (b->flags & MPOOL_PINNED) 17150982Sbostic err("mpool_get: page %d already pinned", b->pgno); 17250982Sbostic #endif 17350982Sbostic rmchain(b); 17450982Sbostic inschain(b, &mp->lru); 17550982Sbostic b->flags |= MPOOL_PINNED; 17650982Sbostic return (b->page); 17750982Sbostic } 17850982Sbostic 17950982Sbostic /* Not allowed to retrieve a non-existent page. */ 18050982Sbostic if (pgno >= mp->npages) { 18150982Sbostic errno = EINVAL; 18250982Sbostic return (NULL); 18350982Sbostic } 18450982Sbostic 18550982Sbostic /* Get a page from the cache. */ 18650982Sbostic if ((b = mpool_bkt(mp)) == NULL) 18750982Sbostic return (NULL); 18850982Sbostic b->pgno = pgno; 18950982Sbostic b->flags = MPOOL_PINNED; 19050982Sbostic 19150982Sbostic #ifdef STATISTICS 19250982Sbostic ++mp->pageread; 19350982Sbostic #endif 19450982Sbostic /* Read in the contents. */ 19550982Sbostic off = mp->pagesize * pgno; 19650982Sbostic if (lseek(mp->fd, off, SEEK_SET) != off) 19750982Sbostic return (NULL); 19850982Sbostic if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { 19950982Sbostic if (nr >= 0) 20050982Sbostic errno = EFTYPE; 20150982Sbostic return (NULL); 20250982Sbostic } 20350982Sbostic if (mp->pgin) 20450982Sbostic (mp->pgin)(mp->pgcookie, b->pgno, b->page); 20550982Sbostic 20650982Sbostic inshash(b, b->pgno); 20750982Sbostic inschain(b, &mp->lru); 20850982Sbostic #ifdef STATISTICS 20950982Sbostic ++mp->pageget; 21050982Sbostic #endif 21150982Sbostic return (b->page); 21250982Sbostic } 21350982Sbostic 21450982Sbostic /* 21550982Sbostic * MPOOL_PUT -- return a page to the pool 21650982Sbostic * 21750982Sbostic * Parameters: 21850982Sbostic * mp: mpool cookie 21950982Sbostic * page: page pointer 22050982Sbostic * pgno: page number 22150982Sbostic * 22250982Sbostic * Returns: 22350982Sbostic * RET_ERROR, RET_SUCCESS 22450982Sbostic */ 22550982Sbostic int 22650982Sbostic mpool_put(mp, page, flags) 22750982Sbostic MPOOL *mp; 22850982Sbostic void *page; 22950982Sbostic u_int flags; 23050982Sbostic { 23150982Sbostic BKT *baddr; 23250982Sbostic #ifdef DEBUG 23350982Sbostic BKT *b; 23450982Sbostic #endif 23550982Sbostic 23650982Sbostic #ifdef STATISTICS 23750982Sbostic ++mp->pageput; 23850982Sbostic #endif 239*51766Sbostic baddr = (BKT *)((char *)page - sizeof(BKT)); 24050982Sbostic #ifdef DEBUG 24150982Sbostic if (!(baddr->flags & MPOOL_PINNED)) 24250982Sbostic err("mpool_put: page %d not pinned", b->pgno); 24350982Sbostic for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 24450982Sbostic if (b == (BKT *)&mp->lru) 24550982Sbostic err("mpool_put: %0x: bad address", baddr); 24650982Sbostic if (b == baddr) 24750982Sbostic break; 24850982Sbostic } 24950982Sbostic #endif 25050982Sbostic baddr->flags &= ~MPOOL_PINNED; 25150982Sbostic baddr->flags |= flags & MPOOL_DIRTY; 25250982Sbostic return (RET_SUCCESS); 25350982Sbostic } 25450982Sbostic 25550982Sbostic /* 25650982Sbostic * MPOOL_CLOSE -- close the buffer pool 25750982Sbostic * 25850982Sbostic * Parameters: 25950982Sbostic * mp: mpool cookie 26050982Sbostic * 26150982Sbostic * Returns: 26250982Sbostic * RET_ERROR, RET_SUCCESS 26350982Sbostic */ 26450982Sbostic int 26550982Sbostic mpool_close(mp) 26650982Sbostic MPOOL *mp; 26750982Sbostic { 26850982Sbostic BKT *b, *next; 26950982Sbostic 27050982Sbostic /* Free up any space allocated to the lru pages. */ 27150982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { 27250982Sbostic next = b->cprev; 27350982Sbostic free(b->page); 27450982Sbostic free(b); 27550982Sbostic } 27650982Sbostic return (RET_SUCCESS); 27750982Sbostic } 27850982Sbostic 27950982Sbostic /* 28050982Sbostic * MPOOL_SYNC -- sync the file to disk. 28150982Sbostic * 28250982Sbostic * Parameters: 28350982Sbostic * mp: mpool cookie 28450982Sbostic * 28550982Sbostic * Returns: 28650982Sbostic * RET_ERROR, RET_SUCCESS 28750982Sbostic */ 28850982Sbostic int 28950982Sbostic mpool_sync(mp) 29050982Sbostic MPOOL *mp; 29150982Sbostic { 29250982Sbostic BKT *b; 29350982Sbostic 29450982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 29550982Sbostic if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) 29650982Sbostic return (RET_ERROR); 29750982Sbostic return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 29850982Sbostic } 29950982Sbostic 30050982Sbostic /* 30150982Sbostic * MPOOL_BKT -- get/create a BKT from the cache 30250982Sbostic * 30350982Sbostic * Parameters: 30450982Sbostic * mp: mpool cookie 30550982Sbostic * 30650982Sbostic * Returns: 30750982Sbostic * NULL on failure and a pointer to the BKT on success 30850982Sbostic */ 30950982Sbostic static BKT * 31050982Sbostic mpool_bkt(mp) 31150982Sbostic MPOOL *mp; 31250982Sbostic { 31350982Sbostic BKT *b; 31450982Sbostic 31550982Sbostic if (mp->curcache < mp->maxcache) 31650982Sbostic goto new; 31750982Sbostic 31850982Sbostic /* 31950982Sbostic * If the cache is maxxed out, search the lru list for a buffer we 32050982Sbostic * can flush. If we find one, write it if necessary and take it off 32150982Sbostic * any lists. If we don't find anything we grow the cache anyway. 32250982Sbostic * The cache never shrinks. 32350982Sbostic */ 32450982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 32550982Sbostic if (!(b->flags & MPOOL_PINNED)) { 32650982Sbostic if (b->flags & MPOOL_DIRTY && 32750982Sbostic mpool_write(mp, b) == RET_ERROR) 32850982Sbostic return (NULL); 32950982Sbostic rmhash(b); 33050982Sbostic rmchain(b); 33150982Sbostic #ifdef STATISTICS 33250982Sbostic ++mp->pageflush; 33350982Sbostic #endif 33450982Sbostic #ifdef DEBUG 33550982Sbostic { 33650982Sbostic void *spage; 33750982Sbostic spage = b->page; 33850982Sbostic memset(b, 0xff, sizeof(BKT) + mp->pagesize); 33950982Sbostic b->page = spage; 34050982Sbostic } 34150982Sbostic #endif 34250982Sbostic return (b); 34350982Sbostic } 34450982Sbostic 34550982Sbostic new: if ((b = malloc(sizeof(BKT) + mp->pagesize)) == NULL) 34650982Sbostic return (NULL); 34750982Sbostic #ifdef STATISTICS 34850982Sbostic ++mp->pagealloc; 34950982Sbostic #endif 35050982Sbostic #ifdef DEBUG 35150982Sbostic memset(b, 0xff, sizeof(BKT) + mp->pagesize); 35250982Sbostic #endif 35350982Sbostic b->page = (char *)b + sizeof(BKT); 35450982Sbostic ++mp->curcache; 35550982Sbostic return (b); 35650982Sbostic } 35750982Sbostic 35850982Sbostic /* 35950982Sbostic * MPOOL_WRITE -- sync a page to disk 36050982Sbostic * 36150982Sbostic * Parameters: 36250982Sbostic * mp: mpool cookie 36350982Sbostic * 36450982Sbostic * Returns: 36550982Sbostic * RET_ERROR, RET_SUCCESS 36650982Sbostic */ 36750982Sbostic static int 36850982Sbostic mpool_write(mp, b) 36950982Sbostic MPOOL *mp; 37050982Sbostic BKT *b; 37150982Sbostic { 37250982Sbostic off_t off; 37350982Sbostic 37450982Sbostic if (mp->pgout) 37550982Sbostic (mp->pgout)(mp->pgcookie, b->pgno, b->page); 37650982Sbostic 37750982Sbostic #ifdef STATISTICS 37850982Sbostic ++mp->pagewrite; 37950982Sbostic #endif 38050982Sbostic off = mp->pagesize * b->pgno; 38150982Sbostic if (lseek(mp->fd, off, SEEK_SET) != off) 38250982Sbostic return (RET_ERROR); 38350982Sbostic if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) 38450982Sbostic return (RET_ERROR); 38550982Sbostic b->flags &= ~MPOOL_DIRTY; 38650982Sbostic return (RET_SUCCESS); 38750982Sbostic } 38850982Sbostic 38950982Sbostic /* 39050982Sbostic * MPOOL_LOOK -- lookup a page 39150982Sbostic * 39250982Sbostic * Parameters: 39350982Sbostic * mp: mpool cookie 39450982Sbostic * pgno: page number 39550982Sbostic * 39650982Sbostic * Returns: 39750982Sbostic * NULL on failure and a pointer to the BKT on success 39850982Sbostic */ 39950982Sbostic static BKT * 40050982Sbostic mpool_look(mp, pgno) 40150982Sbostic MPOOL *mp; 40250982Sbostic pgno_t pgno; 40350982Sbostic { 40450982Sbostic register BKT *b; 40550982Sbostic register BKTHDR *tb; 40650982Sbostic 40750982Sbostic /* XXX 40850982Sbostic * If find the buffer, put it first on the hash chain so can 40950982Sbostic * find it again quickly. 41050982Sbostic */ 41150982Sbostic tb = &mp->hashtable[HASHKEY(pgno)]; 41250982Sbostic for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) 41350982Sbostic if (b->pgno == pgno) { 41450982Sbostic #ifdef STATISTICS 41550982Sbostic ++mp->cachehit; 41650982Sbostic #endif 41750982Sbostic return (b); 41850982Sbostic } 41950982Sbostic #ifdef STATISTICS 42050982Sbostic ++mp->cachemiss; 42150982Sbostic #endif 42250982Sbostic return (NULL); 42350982Sbostic } 42450982Sbostic 42550982Sbostic #ifdef STATISTICS 42650982Sbostic /* 42750982Sbostic * MPOOL_STAT -- cache statistics 42850982Sbostic * 42950982Sbostic * Parameters: 43050982Sbostic * mp: mpool cookie 43150982Sbostic */ 43250982Sbostic void 43350982Sbostic mpool_stat(mp) 43450982Sbostic MPOOL *mp; 43550982Sbostic { 43650982Sbostic BKT *b; 43750982Sbostic int cnt; 43850982Sbostic char *sep; 43950982Sbostic 44050982Sbostic (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 44150982Sbostic (void)fprintf(stderr, 44250982Sbostic "page size %lu, cacheing %lu pages of %lu page max cache\n", 44350982Sbostic mp->pagesize, mp->curcache, mp->maxcache); 44450982Sbostic (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 44550982Sbostic mp->pageput, mp->pageget, mp->pagenew); 44650982Sbostic (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 44750982Sbostic mp->pagealloc, mp->pageflush); 44850982Sbostic if (mp->cachehit + mp->cachemiss) 44950982Sbostic (void)fprintf(stderr, 45050982Sbostic "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 45150982Sbostic ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 45250982Sbostic * 100, mp->cachehit, mp->cachemiss); 45350982Sbostic (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 45450982Sbostic mp->pageread, mp->pagewrite); 45550982Sbostic 45650982Sbostic sep = ""; 45750982Sbostic cnt = 0; 45850982Sbostic for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 45950982Sbostic (void)fprintf(stderr, "%s%d", sep, b->pgno); 46050982Sbostic if (b->flags & MPOOL_DIRTY) 46150982Sbostic (void)fprintf(stderr, "d"); 46250982Sbostic if (b->flags & MPOOL_PINNED) 46350982Sbostic (void)fprintf(stderr, "P"); 46450982Sbostic if (++cnt == 10) { 46550982Sbostic sep = "\n"; 46650982Sbostic cnt = 0; 46750982Sbostic } else 46850982Sbostic sep = ", "; 46950982Sbostic 47050982Sbostic } 47150982Sbostic (void)fprintf(stderr, "\n"); 47250982Sbostic } 47350982Sbostic #endif 47450982Sbostic 47550982Sbostic #ifdef DEBUG 47650982Sbostic #if __STDC__ 47750982Sbostic #include <stdarg.h> 47850982Sbostic #else 47950982Sbostic #include <varargs.h> 48050982Sbostic #endif 48150982Sbostic 48250982Sbostic static void 48350982Sbostic #if __STDC__ 48450982Sbostic err(const char *fmt, ...) 48550982Sbostic #else 48650982Sbostic err(fmt, va_alist) 48750982Sbostic char *fmt; 48850982Sbostic va_dcl 48950982Sbostic #endif 49050982Sbostic { 49150982Sbostic va_list ap; 49250982Sbostic #if __STDC__ 49350982Sbostic va_start(ap, fmt); 49450982Sbostic #else 49550982Sbostic va_start(ap); 49650982Sbostic #endif 49750982Sbostic (void)vfprintf(stderr, fmt, ap); 49850982Sbostic va_end(ap); 49950982Sbostic (void)fprintf(stderr, "\n"); 50050982Sbostic abort(); 50150982Sbostic /* NOTREACHED */ 50250982Sbostic } 50350982Sbostic #endif 504