150982Sbostic /*- 250982Sbostic * Copyright (c) 1990 The Regents of the University of California. 350982Sbostic * All rights reserved. 450982Sbostic * 550982Sbostic * %sccs.include.redist.c% 650982Sbostic */ 750982Sbostic 850982Sbostic #if defined(LIBC_SCCS) && !defined(lint) 9*57933Sbostic static char sccsid[] = "@(#)mpool.c 5.4 (Berkeley) 02/11/93"; 1050982Sbostic #endif /* LIBC_SCCS and not lint */ 1150982Sbostic 1250982Sbostic #include <sys/param.h> 1350982Sbostic #include <sys/stat.h> 14*57933Sbostic 1550982Sbostic #include <errno.h> 1650982Sbostic #include <stdio.h> 1750982Sbostic #include <stdlib.h> 1850982Sbostic #include <string.h> 19*57933Sbostic #include <unistd.h> 20*57933Sbostic 21*57933Sbostic #include <db.h> 2250982Sbostic #define __MPOOLINTERFACE_PRIVATE 2350982Sbostic #include "mpool.h" 2450982Sbostic 2550982Sbostic static BKT *mpool_bkt __P((MPOOL *)); 2650982Sbostic static BKT *mpool_look __P((MPOOL *, pgno_t)); 2750982Sbostic static int mpool_write __P((MPOOL *, BKT *)); 2850982Sbostic #ifdef DEBUG 2950982Sbostic static void err __P((const char *fmt, ...)); 3050982Sbostic #endif 3150982Sbostic 3250982Sbostic /* 3350982Sbostic * MPOOL_OPEN -- initialize a memory pool. 3450982Sbostic * 3550982Sbostic * Parameters: 3650982Sbostic * key: Shared buffer key. 3750982Sbostic * fd: File descriptor. 3850982Sbostic * pagesize: File page size. 3950982Sbostic * maxcache: Max number of cached pages. 4050982Sbostic * 4150982Sbostic * Returns: 4250982Sbostic * MPOOL pointer, NULL on error. 4350982Sbostic */ 4450982Sbostic MPOOL * 4550982Sbostic mpool_open(key, fd, pagesize, maxcache) 4650982Sbostic DBT *key; 4750982Sbostic int fd; 4850982Sbostic pgno_t pagesize, maxcache; 4950982Sbostic { 5050982Sbostic struct stat sb; 5150982Sbostic MPOOL *mp; 5250982Sbostic int entry; 5350982Sbostic 5450982Sbostic if (fstat(fd, &sb)) 5550982Sbostic return (NULL); 5650982Sbostic /* XXX 5750982Sbostic * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so 5850982Sbostic * that stat(2) returns true for ISSOCK on pipes. Until then, this is 5950982Sbostic * fairly close. 6050982Sbostic */ 6150982Sbostic if (!S_ISREG(sb.st_mode)) { 6250982Sbostic errno = ESPIPE; 6350982Sbostic return (NULL); 6450982Sbostic } 6550982Sbostic 6650982Sbostic if ((mp = malloc(sizeof(MPOOL))) == NULL) 6750982Sbostic return (NULL); 6850982Sbostic mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; 6950982Sbostic mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; 7050982Sbostic for (entry = 0; entry < HASHSIZE; ++entry) 7150982Sbostic mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = 7250982Sbostic mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = 7350982Sbostic (BKT *)&mp->hashtable[entry]; 7450982Sbostic mp->curcache = 0; 7550982Sbostic mp->maxcache = maxcache; 7650982Sbostic mp->pagesize = pagesize; 7750982Sbostic mp->npages = sb.st_size / pagesize; 7850982Sbostic mp->fd = fd; 7950982Sbostic 8050982Sbostic #ifdef STATISTICS 8150982Sbostic mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = 8250982Sbostic mp->pageget = mp->pagenew = mp->pageput = mp->pageread = 8350982Sbostic mp->pagewrite = 0; 8450982Sbostic #endif 8550982Sbostic return (mp); 8650982Sbostic } 8750982Sbostic 8850982Sbostic /* 8950982Sbostic * MPOOL_FILTER -- initialize input/output filters. 9050982Sbostic * 9150982Sbostic * Parameters: 9250982Sbostic * pgin: Page in conversion routine. 9350982Sbostic * pgout: Page out conversion routine. 9450982Sbostic * pgcookie: Cookie for page in/out routines. 9550982Sbostic */ 9650982Sbostic void 9750982Sbostic mpool_filter(mp, pgin, pgout, pgcookie) 9850982Sbostic MPOOL *mp; 9950982Sbostic void (*pgin) __P((void *, pgno_t, void *)); 10050982Sbostic void (*pgout) __P((void *, pgno_t, void *)); 10150982Sbostic void *pgcookie; 10250982Sbostic { 10350982Sbostic mp->pgin = pgin; 10450982Sbostic mp->pgout = pgout; 10550982Sbostic mp->pgcookie = pgcookie; 10650982Sbostic } 10750982Sbostic 10850982Sbostic /* 10950982Sbostic * MPOOL_NEW -- get a new page 11050982Sbostic * 11150982Sbostic * Parameters: 11250982Sbostic * mp: mpool cookie 11350982Sbostic * pgnoadddr: place to store new page number 11450982Sbostic * Returns: 11550982Sbostic * RET_ERROR, RET_SUCCESS 11650982Sbostic */ 11750982Sbostic void * 11850982Sbostic mpool_new(mp, pgnoaddr) 11950982Sbostic MPOOL *mp; 12050982Sbostic pgno_t *pgnoaddr; 12150982Sbostic { 12250982Sbostic BKT *b; 12350982Sbostic BKTHDR *hp; 12450982Sbostic 12550982Sbostic #ifdef STATISTICS 12650982Sbostic ++mp->pagenew; 12750982Sbostic #endif 12850982Sbostic /* 12950982Sbostic * Get a BKT from the cache. Assign a new page number, attach it to 13050982Sbostic * the hash and lru chains and return. 13150982Sbostic */ 13250982Sbostic if ((b = mpool_bkt(mp)) == NULL) 13350982Sbostic return (NULL); 13450982Sbostic *pgnoaddr = b->pgno = mp->npages++; 13550982Sbostic b->flags = MPOOL_PINNED; 13650982Sbostic inshash(b, b->pgno); 13750982Sbostic inschain(b, &mp->lru); 13850982Sbostic return (b->page); 13950982Sbostic } 14050982Sbostic 14150982Sbostic /* 14250982Sbostic * MPOOL_GET -- get a page from the pool 14350982Sbostic * 14450982Sbostic * Parameters: 14550982Sbostic * mp: mpool cookie 14650982Sbostic * pgno: page number 14750982Sbostic * flags: not used 14850982Sbostic * 14950982Sbostic * Returns: 15050982Sbostic * RET_ERROR, RET_SUCCESS 15150982Sbostic */ 15250982Sbostic void * 15350982Sbostic mpool_get(mp, pgno, flags) 15450982Sbostic MPOOL *mp; 15550982Sbostic pgno_t pgno; 15650982Sbostic u_int flags; /* XXX not used? */ 15750982Sbostic { 15850982Sbostic BKT *b; 15950982Sbostic BKTHDR *hp; 16050982Sbostic off_t off; 16150982Sbostic int nr; 16250982Sbostic 16350982Sbostic /* 16450982Sbostic * If asking for a specific page that is already in the cache, find 16550982Sbostic * it and return it. 16650982Sbostic */ 16750982Sbostic if (b = mpool_look(mp, pgno)) { 16850982Sbostic #ifdef STATISTICS 16950982Sbostic ++mp->pageget; 17050982Sbostic #endif 17150982Sbostic #ifdef DEBUG 17250982Sbostic if (b->flags & MPOOL_PINNED) 17350982Sbostic err("mpool_get: page %d already pinned", b->pgno); 17450982Sbostic #endif 17550982Sbostic rmchain(b); 17650982Sbostic inschain(b, &mp->lru); 17750982Sbostic b->flags |= MPOOL_PINNED; 17850982Sbostic return (b->page); 17950982Sbostic } 18050982Sbostic 18150982Sbostic /* Not allowed to retrieve a non-existent page. */ 18250982Sbostic if (pgno >= mp->npages) { 18350982Sbostic errno = EINVAL; 18450982Sbostic return (NULL); 18550982Sbostic } 18650982Sbostic 18750982Sbostic /* Get a page from the cache. */ 18850982Sbostic if ((b = mpool_bkt(mp)) == NULL) 18950982Sbostic return (NULL); 19050982Sbostic b->pgno = pgno; 19150982Sbostic b->flags = MPOOL_PINNED; 19250982Sbostic 19350982Sbostic #ifdef STATISTICS 19450982Sbostic ++mp->pageread; 19550982Sbostic #endif 19650982Sbostic /* Read in the contents. */ 19750982Sbostic off = mp->pagesize * pgno; 19850982Sbostic if (lseek(mp->fd, off, SEEK_SET) != off) 19950982Sbostic return (NULL); 20050982Sbostic if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { 20150982Sbostic if (nr >= 0) 20250982Sbostic errno = EFTYPE; 20350982Sbostic return (NULL); 20450982Sbostic } 20550982Sbostic if (mp->pgin) 20650982Sbostic (mp->pgin)(mp->pgcookie, b->pgno, b->page); 20750982Sbostic 20850982Sbostic inshash(b, b->pgno); 20950982Sbostic inschain(b, &mp->lru); 21050982Sbostic #ifdef STATISTICS 21150982Sbostic ++mp->pageget; 21250982Sbostic #endif 21350982Sbostic return (b->page); 21450982Sbostic } 21550982Sbostic 21650982Sbostic /* 21750982Sbostic * MPOOL_PUT -- return a page to the pool 21850982Sbostic * 21950982Sbostic * Parameters: 22050982Sbostic * mp: mpool cookie 22150982Sbostic * page: page pointer 22250982Sbostic * pgno: page number 22350982Sbostic * 22450982Sbostic * Returns: 22550982Sbostic * RET_ERROR, RET_SUCCESS 22650982Sbostic */ 22750982Sbostic int 22850982Sbostic mpool_put(mp, page, flags) 22950982Sbostic MPOOL *mp; 23050982Sbostic void *page; 23150982Sbostic u_int flags; 23250982Sbostic { 23350982Sbostic BKT *baddr; 23450982Sbostic #ifdef DEBUG 23550982Sbostic BKT *b; 23650982Sbostic #endif 23750982Sbostic 23850982Sbostic #ifdef STATISTICS 23950982Sbostic ++mp->pageput; 24050982Sbostic #endif 24151766Sbostic baddr = (BKT *)((char *)page - sizeof(BKT)); 24250982Sbostic #ifdef DEBUG 24350982Sbostic if (!(baddr->flags & MPOOL_PINNED)) 24450982Sbostic err("mpool_put: page %d not pinned", b->pgno); 24550982Sbostic for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 24650982Sbostic if (b == (BKT *)&mp->lru) 24750982Sbostic err("mpool_put: %0x: bad address", baddr); 24850982Sbostic if (b == baddr) 24950982Sbostic break; 25050982Sbostic } 25150982Sbostic #endif 25250982Sbostic baddr->flags &= ~MPOOL_PINNED; 25350982Sbostic baddr->flags |= flags & MPOOL_DIRTY; 25450982Sbostic return (RET_SUCCESS); 25550982Sbostic } 25650982Sbostic 25750982Sbostic /* 25850982Sbostic * MPOOL_CLOSE -- close the buffer pool 25950982Sbostic * 26050982Sbostic * Parameters: 26150982Sbostic * mp: mpool cookie 26250982Sbostic * 26350982Sbostic * Returns: 26450982Sbostic * RET_ERROR, RET_SUCCESS 26550982Sbostic */ 26650982Sbostic int 26750982Sbostic mpool_close(mp) 26850982Sbostic MPOOL *mp; 26950982Sbostic { 27050982Sbostic BKT *b, *next; 27150982Sbostic 27250982Sbostic /* Free up any space allocated to the lru pages. */ 27350982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { 27450982Sbostic next = b->cprev; 27550982Sbostic free(b); 27650982Sbostic } 27751802Sbostic free(mp); 27850982Sbostic return (RET_SUCCESS); 27950982Sbostic } 28050982Sbostic 28150982Sbostic /* 28250982Sbostic * MPOOL_SYNC -- sync the file to disk. 28350982Sbostic * 28450982Sbostic * Parameters: 28550982Sbostic * mp: mpool cookie 28650982Sbostic * 28750982Sbostic * Returns: 28850982Sbostic * RET_ERROR, RET_SUCCESS 28950982Sbostic */ 29050982Sbostic int 29150982Sbostic mpool_sync(mp) 29250982Sbostic MPOOL *mp; 29350982Sbostic { 29450982Sbostic BKT *b; 29550982Sbostic 29650982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 29750982Sbostic if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) 29850982Sbostic return (RET_ERROR); 29950982Sbostic return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 30050982Sbostic } 30150982Sbostic 30250982Sbostic /* 30350982Sbostic * MPOOL_BKT -- get/create a BKT from the cache 30450982Sbostic * 30550982Sbostic * Parameters: 30650982Sbostic * mp: mpool cookie 30750982Sbostic * 30850982Sbostic * Returns: 30950982Sbostic * NULL on failure and a pointer to the BKT on success 31050982Sbostic */ 31150982Sbostic static BKT * 31250982Sbostic mpool_bkt(mp) 31350982Sbostic MPOOL *mp; 31450982Sbostic { 31550982Sbostic BKT *b; 31650982Sbostic 31750982Sbostic if (mp->curcache < mp->maxcache) 31850982Sbostic goto new; 31950982Sbostic 32050982Sbostic /* 32150982Sbostic * If the cache is maxxed out, search the lru list for a buffer we 32250982Sbostic * can flush. If we find one, write it if necessary and take it off 32350982Sbostic * any lists. If we don't find anything we grow the cache anyway. 32450982Sbostic * The cache never shrinks. 32550982Sbostic */ 32650982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 32750982Sbostic if (!(b->flags & MPOOL_PINNED)) { 32850982Sbostic if (b->flags & MPOOL_DIRTY && 32950982Sbostic mpool_write(mp, b) == RET_ERROR) 33050982Sbostic return (NULL); 33150982Sbostic rmhash(b); 33250982Sbostic rmchain(b); 33350982Sbostic #ifdef STATISTICS 33450982Sbostic ++mp->pageflush; 33550982Sbostic #endif 33650982Sbostic #ifdef DEBUG 33750982Sbostic { 33850982Sbostic void *spage; 33950982Sbostic spage = b->page; 34050982Sbostic memset(b, 0xff, sizeof(BKT) + mp->pagesize); 34150982Sbostic b->page = spage; 34250982Sbostic } 34350982Sbostic #endif 34450982Sbostic return (b); 34550982Sbostic } 34650982Sbostic 34750982Sbostic new: if ((b = malloc(sizeof(BKT) + mp->pagesize)) == NULL) 34850982Sbostic return (NULL); 34950982Sbostic #ifdef STATISTICS 35050982Sbostic ++mp->pagealloc; 35150982Sbostic #endif 35250982Sbostic #ifdef DEBUG 35350982Sbostic memset(b, 0xff, sizeof(BKT) + mp->pagesize); 35450982Sbostic #endif 35550982Sbostic b->page = (char *)b + sizeof(BKT); 35650982Sbostic ++mp->curcache; 35750982Sbostic return (b); 35850982Sbostic } 35950982Sbostic 36050982Sbostic /* 36150982Sbostic * MPOOL_WRITE -- sync a page to disk 36250982Sbostic * 36350982Sbostic * Parameters: 36450982Sbostic * mp: mpool cookie 36550982Sbostic * 36650982Sbostic * Returns: 36750982Sbostic * RET_ERROR, RET_SUCCESS 36850982Sbostic */ 36950982Sbostic static int 37050982Sbostic mpool_write(mp, b) 37150982Sbostic MPOOL *mp; 37250982Sbostic BKT *b; 37350982Sbostic { 37450982Sbostic off_t off; 37550982Sbostic 37650982Sbostic if (mp->pgout) 37750982Sbostic (mp->pgout)(mp->pgcookie, b->pgno, b->page); 37850982Sbostic 37950982Sbostic #ifdef STATISTICS 38050982Sbostic ++mp->pagewrite; 38150982Sbostic #endif 38250982Sbostic off = mp->pagesize * b->pgno; 38350982Sbostic if (lseek(mp->fd, off, SEEK_SET) != off) 38450982Sbostic return (RET_ERROR); 38550982Sbostic if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) 38650982Sbostic return (RET_ERROR); 38750982Sbostic b->flags &= ~MPOOL_DIRTY; 38850982Sbostic return (RET_SUCCESS); 38950982Sbostic } 39050982Sbostic 39150982Sbostic /* 39250982Sbostic * MPOOL_LOOK -- lookup a page 39350982Sbostic * 39450982Sbostic * Parameters: 39550982Sbostic * mp: mpool cookie 39650982Sbostic * pgno: page number 39750982Sbostic * 39850982Sbostic * Returns: 39950982Sbostic * NULL on failure and a pointer to the BKT on success 40050982Sbostic */ 40150982Sbostic static BKT * 40250982Sbostic mpool_look(mp, pgno) 40350982Sbostic MPOOL *mp; 40450982Sbostic pgno_t pgno; 40550982Sbostic { 40650982Sbostic register BKT *b; 40750982Sbostic register BKTHDR *tb; 40850982Sbostic 40950982Sbostic /* XXX 41050982Sbostic * If find the buffer, put it first on the hash chain so can 41150982Sbostic * find it again quickly. 41250982Sbostic */ 41350982Sbostic tb = &mp->hashtable[HASHKEY(pgno)]; 41450982Sbostic for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) 41550982Sbostic if (b->pgno == pgno) { 41650982Sbostic #ifdef STATISTICS 41750982Sbostic ++mp->cachehit; 41850982Sbostic #endif 41950982Sbostic return (b); 42050982Sbostic } 42150982Sbostic #ifdef STATISTICS 42250982Sbostic ++mp->cachemiss; 42350982Sbostic #endif 42450982Sbostic return (NULL); 42550982Sbostic } 42650982Sbostic 42750982Sbostic #ifdef STATISTICS 42850982Sbostic /* 42950982Sbostic * MPOOL_STAT -- cache statistics 43050982Sbostic * 43150982Sbostic * Parameters: 43250982Sbostic * mp: mpool cookie 43350982Sbostic */ 43450982Sbostic void 43550982Sbostic mpool_stat(mp) 43650982Sbostic MPOOL *mp; 43750982Sbostic { 43850982Sbostic BKT *b; 43950982Sbostic int cnt; 44050982Sbostic char *sep; 44150982Sbostic 44250982Sbostic (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 44350982Sbostic (void)fprintf(stderr, 44450982Sbostic "page size %lu, cacheing %lu pages of %lu page max cache\n", 44550982Sbostic mp->pagesize, mp->curcache, mp->maxcache); 44650982Sbostic (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 44750982Sbostic mp->pageput, mp->pageget, mp->pagenew); 44850982Sbostic (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 44950982Sbostic mp->pagealloc, mp->pageflush); 45050982Sbostic if (mp->cachehit + mp->cachemiss) 45150982Sbostic (void)fprintf(stderr, 45250982Sbostic "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 45350982Sbostic ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 45450982Sbostic * 100, mp->cachehit, mp->cachemiss); 45550982Sbostic (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 45650982Sbostic mp->pageread, mp->pagewrite); 45750982Sbostic 45850982Sbostic sep = ""; 45950982Sbostic cnt = 0; 46050982Sbostic for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 46150982Sbostic (void)fprintf(stderr, "%s%d", sep, b->pgno); 46250982Sbostic if (b->flags & MPOOL_DIRTY) 46350982Sbostic (void)fprintf(stderr, "d"); 46450982Sbostic if (b->flags & MPOOL_PINNED) 46550982Sbostic (void)fprintf(stderr, "P"); 46650982Sbostic if (++cnt == 10) { 46750982Sbostic sep = "\n"; 46850982Sbostic cnt = 0; 46950982Sbostic } else 47050982Sbostic sep = ", "; 47150982Sbostic 47250982Sbostic } 47350982Sbostic (void)fprintf(stderr, "\n"); 47450982Sbostic } 47550982Sbostic #endif 47650982Sbostic 47750982Sbostic #ifdef DEBUG 47850982Sbostic #if __STDC__ 47950982Sbostic #include <stdarg.h> 48050982Sbostic #else 48150982Sbostic #include <varargs.h> 48250982Sbostic #endif 48350982Sbostic 48450982Sbostic static void 48550982Sbostic #if __STDC__ 48650982Sbostic err(const char *fmt, ...) 48750982Sbostic #else 48850982Sbostic err(fmt, va_alist) 48950982Sbostic char *fmt; 49050982Sbostic va_dcl 49150982Sbostic #endif 49250982Sbostic { 49350982Sbostic va_list ap; 49450982Sbostic #if __STDC__ 49550982Sbostic va_start(ap, fmt); 49650982Sbostic #else 49750982Sbostic va_start(ap); 49850982Sbostic #endif 49950982Sbostic (void)vfprintf(stderr, fmt, ap); 50050982Sbostic va_end(ap); 50150982Sbostic (void)fprintf(stderr, "\n"); 50250982Sbostic abort(); 50350982Sbostic /* NOTREACHED */ 50450982Sbostic } 50550982Sbostic #endif 506