150982Sbostic /*- 250982Sbostic * Copyright (c) 1990 The Regents of the University of California. 350982Sbostic * All rights reserved. 450982Sbostic * 550982Sbostic * %sccs.include.redist.c% 650982Sbostic */ 750982Sbostic 850982Sbostic #if defined(LIBC_SCCS) && !defined(lint) 9*58072Sbostic static char sccsid[] = "@(#)mpool.c 5.5 (Berkeley) 02/19/93"; 1050982Sbostic #endif /* LIBC_SCCS and not lint */ 1150982Sbostic 1250982Sbostic #include <sys/param.h> 1350982Sbostic #include <sys/stat.h> 1457933Sbostic 1550982Sbostic #include <errno.h> 1650982Sbostic #include <stdio.h> 1750982Sbostic #include <stdlib.h> 1850982Sbostic #include <string.h> 1957933Sbostic #include <unistd.h> 2057933Sbostic 2157933Sbostic #include <db.h> 2250982Sbostic #define __MPOOLINTERFACE_PRIVATE 2350982Sbostic #include "mpool.h" 2450982Sbostic 2550982Sbostic static BKT *mpool_bkt __P((MPOOL *)); 2650982Sbostic static BKT *mpool_look __P((MPOOL *, pgno_t)); 2750982Sbostic static int mpool_write __P((MPOOL *, BKT *)); 2850982Sbostic #ifdef DEBUG 2950982Sbostic static void err __P((const char *fmt, ...)); 3050982Sbostic #endif 3150982Sbostic 3250982Sbostic /* 3350982Sbostic * MPOOL_OPEN -- initialize a memory pool. 3450982Sbostic * 3550982Sbostic * Parameters: 3650982Sbostic * key: Shared buffer key. 3750982Sbostic * fd: File descriptor. 3850982Sbostic * pagesize: File page size. 3950982Sbostic * maxcache: Max number of cached pages. 4050982Sbostic * 4150982Sbostic * Returns: 4250982Sbostic * MPOOL pointer, NULL on error. 4350982Sbostic */ 4450982Sbostic MPOOL * 4550982Sbostic mpool_open(key, fd, pagesize, maxcache) 4650982Sbostic DBT *key; 4750982Sbostic int fd; 4850982Sbostic pgno_t pagesize, maxcache; 4950982Sbostic { 5050982Sbostic struct stat sb; 5150982Sbostic MPOOL *mp; 5250982Sbostic int entry; 5350982Sbostic 5450982Sbostic if (fstat(fd, &sb)) 5550982Sbostic return (NULL); 5650982Sbostic /* XXX 5750982Sbostic * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so 5850982Sbostic * that stat(2) returns true for ISSOCK on pipes. Until then, this is 5950982Sbostic * fairly close. 6050982Sbostic */ 6150982Sbostic if (!S_ISREG(sb.st_mode)) { 6250982Sbostic errno = ESPIPE; 6350982Sbostic return (NULL); 6450982Sbostic } 6550982Sbostic 6650982Sbostic if ((mp = malloc(sizeof(MPOOL))) == NULL) 6750982Sbostic return (NULL); 6850982Sbostic mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; 6950982Sbostic mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; 7050982Sbostic for (entry = 0; entry < HASHSIZE; ++entry) 7150982Sbostic mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = 7250982Sbostic mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = 7350982Sbostic (BKT *)&mp->hashtable[entry]; 7450982Sbostic mp->curcache = 0; 7550982Sbostic mp->maxcache = maxcache; 7650982Sbostic mp->pagesize = pagesize; 7750982Sbostic mp->npages = sb.st_size / pagesize; 7850982Sbostic mp->fd = fd; 79*58072Sbostic mp->pgcookie = NULL; 80*58072Sbostic mp->pgin = mp->pgout = NULL; 8150982Sbostic 8250982Sbostic #ifdef STATISTICS 8350982Sbostic mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = 8450982Sbostic mp->pageget = mp->pagenew = mp->pageput = mp->pageread = 8550982Sbostic mp->pagewrite = 0; 8650982Sbostic #endif 8750982Sbostic return (mp); 8850982Sbostic } 8950982Sbostic 9050982Sbostic /* 9150982Sbostic * MPOOL_FILTER -- initialize input/output filters. 9250982Sbostic * 9350982Sbostic * Parameters: 9450982Sbostic * pgin: Page in conversion routine. 9550982Sbostic * pgout: Page out conversion routine. 9650982Sbostic * pgcookie: Cookie for page in/out routines. 9750982Sbostic */ 9850982Sbostic void 9950982Sbostic mpool_filter(mp, pgin, pgout, pgcookie) 10050982Sbostic MPOOL *mp; 10150982Sbostic void (*pgin) __P((void *, pgno_t, void *)); 10250982Sbostic void (*pgout) __P((void *, pgno_t, void *)); 10350982Sbostic void *pgcookie; 10450982Sbostic { 10550982Sbostic mp->pgin = pgin; 10650982Sbostic mp->pgout = pgout; 10750982Sbostic mp->pgcookie = pgcookie; 10850982Sbostic } 10950982Sbostic 11050982Sbostic /* 11150982Sbostic * MPOOL_NEW -- get a new page 11250982Sbostic * 11350982Sbostic * Parameters: 11450982Sbostic * mp: mpool cookie 11550982Sbostic * pgnoadddr: place to store new page number 11650982Sbostic * Returns: 11750982Sbostic * RET_ERROR, RET_SUCCESS 11850982Sbostic */ 11950982Sbostic void * 12050982Sbostic mpool_new(mp, pgnoaddr) 12150982Sbostic MPOOL *mp; 12250982Sbostic pgno_t *pgnoaddr; 12350982Sbostic { 12450982Sbostic BKT *b; 12550982Sbostic BKTHDR *hp; 12650982Sbostic 12750982Sbostic #ifdef STATISTICS 12850982Sbostic ++mp->pagenew; 12950982Sbostic #endif 13050982Sbostic /* 13150982Sbostic * Get a BKT from the cache. Assign a new page number, attach it to 13250982Sbostic * the hash and lru chains and return. 13350982Sbostic */ 13450982Sbostic if ((b = mpool_bkt(mp)) == NULL) 13550982Sbostic return (NULL); 13650982Sbostic *pgnoaddr = b->pgno = mp->npages++; 13750982Sbostic b->flags = MPOOL_PINNED; 13850982Sbostic inshash(b, b->pgno); 13950982Sbostic inschain(b, &mp->lru); 14050982Sbostic return (b->page); 14150982Sbostic } 14250982Sbostic 14350982Sbostic /* 14450982Sbostic * MPOOL_GET -- get a page from the pool 14550982Sbostic * 14650982Sbostic * Parameters: 14750982Sbostic * mp: mpool cookie 14850982Sbostic * pgno: page number 14950982Sbostic * flags: not used 15050982Sbostic * 15150982Sbostic * Returns: 15250982Sbostic * RET_ERROR, RET_SUCCESS 15350982Sbostic */ 15450982Sbostic void * 15550982Sbostic mpool_get(mp, pgno, flags) 15650982Sbostic MPOOL *mp; 15750982Sbostic pgno_t pgno; 15850982Sbostic u_int flags; /* XXX not used? */ 15950982Sbostic { 16050982Sbostic BKT *b; 16150982Sbostic BKTHDR *hp; 16250982Sbostic off_t off; 16350982Sbostic int nr; 16450982Sbostic 16550982Sbostic /* 16650982Sbostic * If asking for a specific page that is already in the cache, find 16750982Sbostic * it and return it. 16850982Sbostic */ 16950982Sbostic if (b = mpool_look(mp, pgno)) { 17050982Sbostic #ifdef STATISTICS 17150982Sbostic ++mp->pageget; 17250982Sbostic #endif 17350982Sbostic #ifdef DEBUG 17450982Sbostic if (b->flags & MPOOL_PINNED) 17550982Sbostic err("mpool_get: page %d already pinned", b->pgno); 17650982Sbostic #endif 17750982Sbostic rmchain(b); 17850982Sbostic inschain(b, &mp->lru); 17950982Sbostic b->flags |= MPOOL_PINNED; 18050982Sbostic return (b->page); 18150982Sbostic } 18250982Sbostic 18350982Sbostic /* Not allowed to retrieve a non-existent page. */ 18450982Sbostic if (pgno >= mp->npages) { 18550982Sbostic errno = EINVAL; 18650982Sbostic return (NULL); 18750982Sbostic } 18850982Sbostic 18950982Sbostic /* Get a page from the cache. */ 19050982Sbostic if ((b = mpool_bkt(mp)) == NULL) 19150982Sbostic return (NULL); 19250982Sbostic b->pgno = pgno; 19350982Sbostic b->flags = MPOOL_PINNED; 19450982Sbostic 19550982Sbostic #ifdef STATISTICS 19650982Sbostic ++mp->pageread; 19750982Sbostic #endif 19850982Sbostic /* Read in the contents. */ 19950982Sbostic off = mp->pagesize * pgno; 20050982Sbostic if (lseek(mp->fd, off, SEEK_SET) != off) 20150982Sbostic return (NULL); 20250982Sbostic if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { 20350982Sbostic if (nr >= 0) 20450982Sbostic errno = EFTYPE; 20550982Sbostic return (NULL); 20650982Sbostic } 20750982Sbostic if (mp->pgin) 20850982Sbostic (mp->pgin)(mp->pgcookie, b->pgno, b->page); 20950982Sbostic 21050982Sbostic inshash(b, b->pgno); 21150982Sbostic inschain(b, &mp->lru); 21250982Sbostic #ifdef STATISTICS 21350982Sbostic ++mp->pageget; 21450982Sbostic #endif 21550982Sbostic return (b->page); 21650982Sbostic } 21750982Sbostic 21850982Sbostic /* 21950982Sbostic * MPOOL_PUT -- return a page to the pool 22050982Sbostic * 22150982Sbostic * Parameters: 22250982Sbostic * mp: mpool cookie 22350982Sbostic * page: page pointer 22450982Sbostic * pgno: page number 22550982Sbostic * 22650982Sbostic * Returns: 22750982Sbostic * RET_ERROR, RET_SUCCESS 22850982Sbostic */ 22950982Sbostic int 23050982Sbostic mpool_put(mp, page, flags) 23150982Sbostic MPOOL *mp; 23250982Sbostic void *page; 23350982Sbostic u_int flags; 23450982Sbostic { 23550982Sbostic BKT *baddr; 23650982Sbostic #ifdef DEBUG 23750982Sbostic BKT *b; 23850982Sbostic #endif 23950982Sbostic 24050982Sbostic #ifdef STATISTICS 24150982Sbostic ++mp->pageput; 24250982Sbostic #endif 24351766Sbostic baddr = (BKT *)((char *)page - sizeof(BKT)); 24450982Sbostic #ifdef DEBUG 24550982Sbostic if (!(baddr->flags & MPOOL_PINNED)) 24650982Sbostic err("mpool_put: page %d not pinned", b->pgno); 24750982Sbostic for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 24850982Sbostic if (b == (BKT *)&mp->lru) 24950982Sbostic err("mpool_put: %0x: bad address", baddr); 25050982Sbostic if (b == baddr) 25150982Sbostic break; 25250982Sbostic } 25350982Sbostic #endif 25450982Sbostic baddr->flags &= ~MPOOL_PINNED; 25550982Sbostic baddr->flags |= flags & MPOOL_DIRTY; 25650982Sbostic return (RET_SUCCESS); 25750982Sbostic } 25850982Sbostic 25950982Sbostic /* 26050982Sbostic * MPOOL_CLOSE -- close the buffer pool 26150982Sbostic * 26250982Sbostic * Parameters: 26350982Sbostic * mp: mpool cookie 26450982Sbostic * 26550982Sbostic * Returns: 26650982Sbostic * RET_ERROR, RET_SUCCESS 26750982Sbostic */ 26850982Sbostic int 26950982Sbostic mpool_close(mp) 27050982Sbostic MPOOL *mp; 27150982Sbostic { 27250982Sbostic BKT *b, *next; 27350982Sbostic 27450982Sbostic /* Free up any space allocated to the lru pages. */ 27550982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { 27650982Sbostic next = b->cprev; 27750982Sbostic free(b); 27850982Sbostic } 27951802Sbostic free(mp); 28050982Sbostic return (RET_SUCCESS); 28150982Sbostic } 28250982Sbostic 28350982Sbostic /* 28450982Sbostic * MPOOL_SYNC -- sync the file to disk. 28550982Sbostic * 28650982Sbostic * Parameters: 28750982Sbostic * mp: mpool cookie 28850982Sbostic * 28950982Sbostic * Returns: 29050982Sbostic * RET_ERROR, RET_SUCCESS 29150982Sbostic */ 29250982Sbostic int 29350982Sbostic mpool_sync(mp) 29450982Sbostic MPOOL *mp; 29550982Sbostic { 29650982Sbostic BKT *b; 29750982Sbostic 29850982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 29950982Sbostic if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) 30050982Sbostic return (RET_ERROR); 30150982Sbostic return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 30250982Sbostic } 30350982Sbostic 30450982Sbostic /* 30550982Sbostic * MPOOL_BKT -- get/create a BKT from the cache 30650982Sbostic * 30750982Sbostic * Parameters: 30850982Sbostic * mp: mpool cookie 30950982Sbostic * 31050982Sbostic * Returns: 31150982Sbostic * NULL on failure and a pointer to the BKT on success 31250982Sbostic */ 31350982Sbostic static BKT * 31450982Sbostic mpool_bkt(mp) 31550982Sbostic MPOOL *mp; 31650982Sbostic { 31750982Sbostic BKT *b; 31850982Sbostic 31950982Sbostic if (mp->curcache < mp->maxcache) 32050982Sbostic goto new; 32150982Sbostic 32250982Sbostic /* 32350982Sbostic * If the cache is maxxed out, search the lru list for a buffer we 32450982Sbostic * can flush. If we find one, write it if necessary and take it off 32550982Sbostic * any lists. If we don't find anything we grow the cache anyway. 32650982Sbostic * The cache never shrinks. 32750982Sbostic */ 32850982Sbostic for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 32950982Sbostic if (!(b->flags & MPOOL_PINNED)) { 33050982Sbostic if (b->flags & MPOOL_DIRTY && 33150982Sbostic mpool_write(mp, b) == RET_ERROR) 33250982Sbostic return (NULL); 33350982Sbostic rmhash(b); 33450982Sbostic rmchain(b); 33550982Sbostic #ifdef STATISTICS 33650982Sbostic ++mp->pageflush; 33750982Sbostic #endif 33850982Sbostic #ifdef DEBUG 33950982Sbostic { 34050982Sbostic void *spage; 34150982Sbostic spage = b->page; 34250982Sbostic memset(b, 0xff, sizeof(BKT) + mp->pagesize); 34350982Sbostic b->page = spage; 34450982Sbostic } 34550982Sbostic #endif 34650982Sbostic return (b); 34750982Sbostic } 34850982Sbostic 34950982Sbostic new: if ((b = malloc(sizeof(BKT) + mp->pagesize)) == NULL) 35050982Sbostic return (NULL); 35150982Sbostic #ifdef STATISTICS 35250982Sbostic ++mp->pagealloc; 35350982Sbostic #endif 35450982Sbostic #ifdef DEBUG 35550982Sbostic memset(b, 0xff, sizeof(BKT) + mp->pagesize); 35650982Sbostic #endif 35750982Sbostic b->page = (char *)b + sizeof(BKT); 35850982Sbostic ++mp->curcache; 35950982Sbostic return (b); 36050982Sbostic } 36150982Sbostic 36250982Sbostic /* 36350982Sbostic * MPOOL_WRITE -- sync a page to disk 36450982Sbostic * 36550982Sbostic * Parameters: 36650982Sbostic * mp: mpool cookie 36750982Sbostic * 36850982Sbostic * Returns: 36950982Sbostic * RET_ERROR, RET_SUCCESS 37050982Sbostic */ 37150982Sbostic static int 37250982Sbostic mpool_write(mp, b) 37350982Sbostic MPOOL *mp; 37450982Sbostic BKT *b; 37550982Sbostic { 37650982Sbostic off_t off; 37750982Sbostic 37850982Sbostic if (mp->pgout) 37950982Sbostic (mp->pgout)(mp->pgcookie, b->pgno, b->page); 38050982Sbostic 38150982Sbostic #ifdef STATISTICS 38250982Sbostic ++mp->pagewrite; 38350982Sbostic #endif 38450982Sbostic off = mp->pagesize * b->pgno; 38550982Sbostic if (lseek(mp->fd, off, SEEK_SET) != off) 38650982Sbostic return (RET_ERROR); 38750982Sbostic if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) 38850982Sbostic return (RET_ERROR); 38950982Sbostic b->flags &= ~MPOOL_DIRTY; 39050982Sbostic return (RET_SUCCESS); 39150982Sbostic } 39250982Sbostic 39350982Sbostic /* 39450982Sbostic * MPOOL_LOOK -- lookup a page 39550982Sbostic * 39650982Sbostic * Parameters: 39750982Sbostic * mp: mpool cookie 39850982Sbostic * pgno: page number 39950982Sbostic * 40050982Sbostic * Returns: 40150982Sbostic * NULL on failure and a pointer to the BKT on success 40250982Sbostic */ 40350982Sbostic static BKT * 40450982Sbostic mpool_look(mp, pgno) 40550982Sbostic MPOOL *mp; 40650982Sbostic pgno_t pgno; 40750982Sbostic { 40850982Sbostic register BKT *b; 40950982Sbostic register BKTHDR *tb; 41050982Sbostic 41150982Sbostic /* XXX 41250982Sbostic * If find the buffer, put it first on the hash chain so can 41350982Sbostic * find it again quickly. 41450982Sbostic */ 41550982Sbostic tb = &mp->hashtable[HASHKEY(pgno)]; 41650982Sbostic for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) 41750982Sbostic if (b->pgno == pgno) { 41850982Sbostic #ifdef STATISTICS 41950982Sbostic ++mp->cachehit; 42050982Sbostic #endif 42150982Sbostic return (b); 42250982Sbostic } 42350982Sbostic #ifdef STATISTICS 42450982Sbostic ++mp->cachemiss; 42550982Sbostic #endif 42650982Sbostic return (NULL); 42750982Sbostic } 42850982Sbostic 42950982Sbostic #ifdef STATISTICS 43050982Sbostic /* 43150982Sbostic * MPOOL_STAT -- cache statistics 43250982Sbostic * 43350982Sbostic * Parameters: 43450982Sbostic * mp: mpool cookie 43550982Sbostic */ 43650982Sbostic void 43750982Sbostic mpool_stat(mp) 43850982Sbostic MPOOL *mp; 43950982Sbostic { 44050982Sbostic BKT *b; 44150982Sbostic int cnt; 44250982Sbostic char *sep; 44350982Sbostic 44450982Sbostic (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 44550982Sbostic (void)fprintf(stderr, 44650982Sbostic "page size %lu, cacheing %lu pages of %lu page max cache\n", 44750982Sbostic mp->pagesize, mp->curcache, mp->maxcache); 44850982Sbostic (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 44950982Sbostic mp->pageput, mp->pageget, mp->pagenew); 45050982Sbostic (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 45150982Sbostic mp->pagealloc, mp->pageflush); 45250982Sbostic if (mp->cachehit + mp->cachemiss) 45350982Sbostic (void)fprintf(stderr, 45450982Sbostic "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 45550982Sbostic ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 45650982Sbostic * 100, mp->cachehit, mp->cachemiss); 45750982Sbostic (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 45850982Sbostic mp->pageread, mp->pagewrite); 45950982Sbostic 46050982Sbostic sep = ""; 46150982Sbostic cnt = 0; 46250982Sbostic for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 46350982Sbostic (void)fprintf(stderr, "%s%d", sep, b->pgno); 46450982Sbostic if (b->flags & MPOOL_DIRTY) 46550982Sbostic (void)fprintf(stderr, "d"); 46650982Sbostic if (b->flags & MPOOL_PINNED) 46750982Sbostic (void)fprintf(stderr, "P"); 46850982Sbostic if (++cnt == 10) { 46950982Sbostic sep = "\n"; 47050982Sbostic cnt = 0; 47150982Sbostic } else 47250982Sbostic sep = ", "; 47350982Sbostic 47450982Sbostic } 47550982Sbostic (void)fprintf(stderr, "\n"); 47650982Sbostic } 47750982Sbostic #endif 47850982Sbostic 47950982Sbostic #ifdef DEBUG 48050982Sbostic #if __STDC__ 48150982Sbostic #include <stdarg.h> 48250982Sbostic #else 48350982Sbostic #include <varargs.h> 48450982Sbostic #endif 48550982Sbostic 48650982Sbostic static void 48750982Sbostic #if __STDC__ 48850982Sbostic err(const char *fmt, ...) 48950982Sbostic #else 49050982Sbostic err(fmt, va_alist) 49150982Sbostic char *fmt; 49250982Sbostic va_dcl 49350982Sbostic #endif 49450982Sbostic { 49550982Sbostic va_list ap; 49650982Sbostic #if __STDC__ 49750982Sbostic va_start(ap, fmt); 49850982Sbostic #else 49950982Sbostic va_start(ap); 50050982Sbostic #endif 50150982Sbostic (void)vfprintf(stderr, fmt, ap); 50250982Sbostic va_end(ap); 50350982Sbostic (void)fprintf(stderr, "\n"); 50450982Sbostic abort(); 50550982Sbostic /* NOTREACHED */ 50650982Sbostic } 50750982Sbostic #endif 508