xref: /csrg-svn/lib/libc/db/mpool/mpool.c (revision 58072)
150982Sbostic /*-
250982Sbostic  * Copyright (c) 1990 The Regents of the University of California.
350982Sbostic  * All rights reserved.
450982Sbostic  *
550982Sbostic  * %sccs.include.redist.c%
650982Sbostic  */
750982Sbostic 
850982Sbostic #if defined(LIBC_SCCS) && !defined(lint)
9*58072Sbostic static char sccsid[] = "@(#)mpool.c	5.5 (Berkeley) 02/19/93";
1050982Sbostic #endif /* LIBC_SCCS and not lint */
1150982Sbostic 
1250982Sbostic #include <sys/param.h>
1350982Sbostic #include <sys/stat.h>
1457933Sbostic 
1550982Sbostic #include <errno.h>
1650982Sbostic #include <stdio.h>
1750982Sbostic #include <stdlib.h>
1850982Sbostic #include <string.h>
1957933Sbostic #include <unistd.h>
2057933Sbostic 
2157933Sbostic #include <db.h>
2250982Sbostic #define	__MPOOLINTERFACE_PRIVATE
2350982Sbostic #include "mpool.h"
2450982Sbostic 
2550982Sbostic static BKT *mpool_bkt __P((MPOOL *));
2650982Sbostic static BKT *mpool_look __P((MPOOL *, pgno_t));
2750982Sbostic static int  mpool_write __P((MPOOL *, BKT *));
2850982Sbostic #ifdef DEBUG
2950982Sbostic static void err __P((const char *fmt, ...));
3050982Sbostic #endif
3150982Sbostic 
3250982Sbostic /*
3350982Sbostic  * MPOOL_OPEN -- initialize a memory pool.
3450982Sbostic  *
3550982Sbostic  * Parameters:
3650982Sbostic  *	key:		Shared buffer key.
3750982Sbostic  *	fd:		File descriptor.
3850982Sbostic  *	pagesize:	File page size.
3950982Sbostic  *	maxcache:	Max number of cached pages.
4050982Sbostic  *
4150982Sbostic  * Returns:
4250982Sbostic  *	MPOOL pointer, NULL on error.
4350982Sbostic  */
4450982Sbostic MPOOL *
4550982Sbostic mpool_open(key, fd, pagesize, maxcache)
4650982Sbostic 	DBT *key;
4750982Sbostic 	int fd;
4850982Sbostic 	pgno_t pagesize, maxcache;
4950982Sbostic {
5050982Sbostic 	struct stat sb;
5150982Sbostic 	MPOOL *mp;
5250982Sbostic 	int entry;
5350982Sbostic 
5450982Sbostic 	if (fstat(fd, &sb))
5550982Sbostic 		return (NULL);
5650982Sbostic 	/* XXX
5750982Sbostic 	 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so
5850982Sbostic 	 * that stat(2) returns true for ISSOCK on pipes.  Until then, this is
5950982Sbostic 	 * fairly close.
6050982Sbostic 	 */
6150982Sbostic 	if (!S_ISREG(sb.st_mode)) {
6250982Sbostic 		errno = ESPIPE;
6350982Sbostic 		return (NULL);
6450982Sbostic 	}
6550982Sbostic 
6650982Sbostic 	if ((mp = malloc(sizeof(MPOOL))) == NULL)
6750982Sbostic 		return (NULL);
6850982Sbostic 	mp->free.cnext = mp->free.cprev = (BKT *)&mp->free;
6950982Sbostic 	mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru;
7050982Sbostic 	for (entry = 0; entry < HASHSIZE; ++entry)
7150982Sbostic 		mp->hashtable[entry].hnext = mp->hashtable[entry].hprev =
7250982Sbostic 		    mp->hashtable[entry].cnext = mp->hashtable[entry].cprev =
7350982Sbostic 		    (BKT *)&mp->hashtable[entry];
7450982Sbostic 	mp->curcache = 0;
7550982Sbostic 	mp->maxcache = maxcache;
7650982Sbostic 	mp->pagesize = pagesize;
7750982Sbostic 	mp->npages = sb.st_size / pagesize;
7850982Sbostic 	mp->fd = fd;
79*58072Sbostic 	mp->pgcookie = NULL;
80*58072Sbostic 	mp->pgin = mp->pgout = NULL;
8150982Sbostic 
8250982Sbostic #ifdef STATISTICS
8350982Sbostic 	mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush =
8450982Sbostic 	    mp->pageget = mp->pagenew = mp->pageput = mp->pageread =
8550982Sbostic 	    mp->pagewrite = 0;
8650982Sbostic #endif
8750982Sbostic 	return (mp);
8850982Sbostic }
8950982Sbostic 
9050982Sbostic /*
9150982Sbostic  * MPOOL_FILTER -- initialize input/output filters.
9250982Sbostic  *
9350982Sbostic  * Parameters:
9450982Sbostic  *	pgin:		Page in conversion routine.
9550982Sbostic  *	pgout:		Page out conversion routine.
9650982Sbostic  *	pgcookie:	Cookie for page in/out routines.
9750982Sbostic  */
9850982Sbostic void
9950982Sbostic mpool_filter(mp, pgin, pgout, pgcookie)
10050982Sbostic 	MPOOL *mp;
10150982Sbostic 	void (*pgin) __P((void *, pgno_t, void *));
10250982Sbostic 	void (*pgout) __P((void *, pgno_t, void *));
10350982Sbostic 	void *pgcookie;
10450982Sbostic {
10550982Sbostic 	mp->pgin = pgin;
10650982Sbostic 	mp->pgout = pgout;
10750982Sbostic 	mp->pgcookie = pgcookie;
10850982Sbostic }
10950982Sbostic 
11050982Sbostic /*
11150982Sbostic  * MPOOL_NEW -- get a new page
11250982Sbostic  *
11350982Sbostic  * Parameters:
11450982Sbostic  *	mp:		mpool cookie
11550982Sbostic  *	pgnoadddr:	place to store new page number
11650982Sbostic  * Returns:
11750982Sbostic  *	RET_ERROR, RET_SUCCESS
11850982Sbostic  */
11950982Sbostic void *
12050982Sbostic mpool_new(mp, pgnoaddr)
12150982Sbostic 	MPOOL *mp;
12250982Sbostic 	pgno_t *pgnoaddr;
12350982Sbostic {
12450982Sbostic 	BKT *b;
12550982Sbostic 	BKTHDR *hp;
12650982Sbostic 
12750982Sbostic #ifdef STATISTICS
12850982Sbostic 	++mp->pagenew;
12950982Sbostic #endif
13050982Sbostic 	/*
13150982Sbostic 	 * Get a BKT from the cache.  Assign a new page number, attach it to
13250982Sbostic 	 * the hash and lru chains and return.
13350982Sbostic 	 */
13450982Sbostic 	if ((b = mpool_bkt(mp)) == NULL)
13550982Sbostic 		return (NULL);
13650982Sbostic 	*pgnoaddr = b->pgno = mp->npages++;
13750982Sbostic 	b->flags = MPOOL_PINNED;
13850982Sbostic 	inshash(b, b->pgno);
13950982Sbostic 	inschain(b, &mp->lru);
14050982Sbostic 	return (b->page);
14150982Sbostic }
14250982Sbostic 
14350982Sbostic /*
14450982Sbostic  * MPOOL_GET -- get a page from the pool
14550982Sbostic  *
14650982Sbostic  * Parameters:
14750982Sbostic  *	mp:	mpool cookie
14850982Sbostic  *	pgno:	page number
14950982Sbostic  *	flags:	not used
15050982Sbostic  *
15150982Sbostic  * Returns:
15250982Sbostic  *	RET_ERROR, RET_SUCCESS
15350982Sbostic  */
15450982Sbostic void *
15550982Sbostic mpool_get(mp, pgno, flags)
15650982Sbostic 	MPOOL *mp;
15750982Sbostic 	pgno_t pgno;
15850982Sbostic 	u_int flags;		/* XXX not used? */
15950982Sbostic {
16050982Sbostic 	BKT *b;
16150982Sbostic 	BKTHDR *hp;
16250982Sbostic 	off_t off;
16350982Sbostic 	int nr;
16450982Sbostic 
16550982Sbostic 	/*
16650982Sbostic 	 * If asking for a specific page that is already in the cache, find
16750982Sbostic 	 * it and return it.
16850982Sbostic 	 */
16950982Sbostic 	if (b = mpool_look(mp, pgno)) {
17050982Sbostic #ifdef STATISTICS
17150982Sbostic 		++mp->pageget;
17250982Sbostic #endif
17350982Sbostic #ifdef DEBUG
17450982Sbostic 		if (b->flags & MPOOL_PINNED)
17550982Sbostic 			err("mpool_get: page %d already pinned", b->pgno);
17650982Sbostic #endif
17750982Sbostic 		rmchain(b);
17850982Sbostic 		inschain(b, &mp->lru);
17950982Sbostic 		b->flags |= MPOOL_PINNED;
18050982Sbostic 		return (b->page);
18150982Sbostic 	}
18250982Sbostic 
18350982Sbostic 	/* Not allowed to retrieve a non-existent page. */
18450982Sbostic 	if (pgno >= mp->npages) {
18550982Sbostic 		errno = EINVAL;
18650982Sbostic 		return (NULL);
18750982Sbostic 	}
18850982Sbostic 
18950982Sbostic 	/* Get a page from the cache. */
19050982Sbostic 	if ((b = mpool_bkt(mp)) == NULL)
19150982Sbostic 		return (NULL);
19250982Sbostic 	b->pgno = pgno;
19350982Sbostic 	b->flags = MPOOL_PINNED;
19450982Sbostic 
19550982Sbostic #ifdef STATISTICS
19650982Sbostic 	++mp->pageread;
19750982Sbostic #endif
19850982Sbostic 	/* Read in the contents. */
19950982Sbostic 	off = mp->pagesize * pgno;
20050982Sbostic 	if (lseek(mp->fd, off, SEEK_SET) != off)
20150982Sbostic 		return (NULL);
20250982Sbostic 	if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) {
20350982Sbostic 		if (nr >= 0)
20450982Sbostic 			errno = EFTYPE;
20550982Sbostic 		return (NULL);
20650982Sbostic 	}
20750982Sbostic 	if (mp->pgin)
20850982Sbostic 		(mp->pgin)(mp->pgcookie, b->pgno, b->page);
20950982Sbostic 
21050982Sbostic 	inshash(b, b->pgno);
21150982Sbostic 	inschain(b, &mp->lru);
21250982Sbostic #ifdef STATISTICS
21350982Sbostic 	++mp->pageget;
21450982Sbostic #endif
21550982Sbostic 	return (b->page);
21650982Sbostic }
21750982Sbostic 
21850982Sbostic /*
21950982Sbostic  * MPOOL_PUT -- return a page to the pool
22050982Sbostic  *
22150982Sbostic  * Parameters:
22250982Sbostic  *	mp:	mpool cookie
22350982Sbostic  *	page:	page pointer
22450982Sbostic  *	pgno:	page number
22550982Sbostic  *
22650982Sbostic  * Returns:
22750982Sbostic  *	RET_ERROR, RET_SUCCESS
22850982Sbostic  */
22950982Sbostic int
23050982Sbostic mpool_put(mp, page, flags)
23150982Sbostic 	MPOOL *mp;
23250982Sbostic 	void *page;
23350982Sbostic 	u_int flags;
23450982Sbostic {
23550982Sbostic 	BKT *baddr;
23650982Sbostic #ifdef DEBUG
23750982Sbostic 	BKT *b;
23850982Sbostic #endif
23950982Sbostic 
24050982Sbostic #ifdef STATISTICS
24150982Sbostic 	++mp->pageput;
24250982Sbostic #endif
24351766Sbostic 	baddr = (BKT *)((char *)page - sizeof(BKT));
24450982Sbostic #ifdef DEBUG
24550982Sbostic 	if (!(baddr->flags & MPOOL_PINNED))
24650982Sbostic 		err("mpool_put: page %d not pinned", b->pgno);
24750982Sbostic 	for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
24850982Sbostic 		if (b == (BKT *)&mp->lru)
24950982Sbostic 			err("mpool_put: %0x: bad address", baddr);
25050982Sbostic 		if (b == baddr)
25150982Sbostic 			break;
25250982Sbostic 	}
25350982Sbostic #endif
25450982Sbostic 	baddr->flags &= ~MPOOL_PINNED;
25550982Sbostic 	baddr->flags |= flags & MPOOL_DIRTY;
25650982Sbostic 	return (RET_SUCCESS);
25750982Sbostic }
25850982Sbostic 
25950982Sbostic /*
26050982Sbostic  * MPOOL_CLOSE -- close the buffer pool
26150982Sbostic  *
26250982Sbostic  * Parameters:
26350982Sbostic  *	mp:	mpool cookie
26450982Sbostic  *
26550982Sbostic  * Returns:
26650982Sbostic  *	RET_ERROR, RET_SUCCESS
26750982Sbostic  */
26850982Sbostic int
26950982Sbostic mpool_close(mp)
27050982Sbostic 	MPOOL *mp;
27150982Sbostic {
27250982Sbostic 	BKT *b, *next;
27350982Sbostic 
27450982Sbostic 	/* Free up any space allocated to the lru pages. */
27550982Sbostic 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) {
27650982Sbostic 		next = b->cprev;
27750982Sbostic 		free(b);
27850982Sbostic 	}
27951802Sbostic 	free(mp);
28050982Sbostic 	return (RET_SUCCESS);
28150982Sbostic }
28250982Sbostic 
28350982Sbostic /*
28450982Sbostic  * MPOOL_SYNC -- sync the file to disk.
28550982Sbostic  *
28650982Sbostic  * Parameters:
28750982Sbostic  *	mp:	mpool cookie
28850982Sbostic  *
28950982Sbostic  * Returns:
29050982Sbostic  *	RET_ERROR, RET_SUCCESS
29150982Sbostic  */
29250982Sbostic int
29350982Sbostic mpool_sync(mp)
29450982Sbostic 	MPOOL *mp;
29550982Sbostic {
29650982Sbostic 	BKT *b;
29750982Sbostic 
29850982Sbostic 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
29950982Sbostic 		if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR)
30050982Sbostic 			return (RET_ERROR);
30150982Sbostic 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
30250982Sbostic }
30350982Sbostic 
30450982Sbostic /*
30550982Sbostic  * MPOOL_BKT -- get/create a BKT from the cache
30650982Sbostic  *
30750982Sbostic  * Parameters:
30850982Sbostic  *	mp:	mpool cookie
30950982Sbostic  *
31050982Sbostic  * Returns:
31150982Sbostic  *	NULL on failure and a pointer to the BKT on success
31250982Sbostic  */
31350982Sbostic static BKT *
31450982Sbostic mpool_bkt(mp)
31550982Sbostic 	MPOOL *mp;
31650982Sbostic {
31750982Sbostic 	BKT *b;
31850982Sbostic 
31950982Sbostic 	if (mp->curcache < mp->maxcache)
32050982Sbostic 		goto new;
32150982Sbostic 
32250982Sbostic 	/*
32350982Sbostic 	 * If the cache is maxxed out, search the lru list for a buffer we
32450982Sbostic 	 * can flush.  If we find one, write it if necessary and take it off
32550982Sbostic 	 * any lists.  If we don't find anything we grow the cache anyway.
32650982Sbostic 	 * The cache never shrinks.
32750982Sbostic 	 */
32850982Sbostic 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
32950982Sbostic 		if (!(b->flags & MPOOL_PINNED)) {
33050982Sbostic 			if (b->flags & MPOOL_DIRTY &&
33150982Sbostic 			    mpool_write(mp, b) == RET_ERROR)
33250982Sbostic 				return (NULL);
33350982Sbostic 			rmhash(b);
33450982Sbostic 			rmchain(b);
33550982Sbostic #ifdef STATISTICS
33650982Sbostic 			++mp->pageflush;
33750982Sbostic #endif
33850982Sbostic #ifdef DEBUG
33950982Sbostic 			{
34050982Sbostic 				void *spage;
34150982Sbostic 				spage = b->page;
34250982Sbostic 				memset(b, 0xff, sizeof(BKT) + mp->pagesize);
34350982Sbostic 				b->page = spage;
34450982Sbostic 			}
34550982Sbostic #endif
34650982Sbostic 			return (b);
34750982Sbostic 		}
34850982Sbostic 
34950982Sbostic new:	if ((b = malloc(sizeof(BKT) + mp->pagesize)) == NULL)
35050982Sbostic 		return (NULL);
35150982Sbostic #ifdef STATISTICS
35250982Sbostic 	++mp->pagealloc;
35350982Sbostic #endif
35450982Sbostic #ifdef DEBUG
35550982Sbostic 	memset(b, 0xff, sizeof(BKT) + mp->pagesize);
35650982Sbostic #endif
35750982Sbostic 	b->page = (char *)b + sizeof(BKT);
35850982Sbostic 	++mp->curcache;
35950982Sbostic 	return (b);
36050982Sbostic }
36150982Sbostic 
36250982Sbostic /*
36350982Sbostic  * MPOOL_WRITE -- sync a page to disk
36450982Sbostic  *
36550982Sbostic  * Parameters:
36650982Sbostic  *	mp:	mpool cookie
36750982Sbostic  *
36850982Sbostic  * Returns:
36950982Sbostic  *	RET_ERROR, RET_SUCCESS
37050982Sbostic  */
37150982Sbostic static int
37250982Sbostic mpool_write(mp, b)
37350982Sbostic 	MPOOL *mp;
37450982Sbostic 	BKT *b;
37550982Sbostic {
37650982Sbostic 	off_t off;
37750982Sbostic 
37850982Sbostic 	if (mp->pgout)
37950982Sbostic 		(mp->pgout)(mp->pgcookie, b->pgno, b->page);
38050982Sbostic 
38150982Sbostic #ifdef STATISTICS
38250982Sbostic 	++mp->pagewrite;
38350982Sbostic #endif
38450982Sbostic 	off = mp->pagesize * b->pgno;
38550982Sbostic 	if (lseek(mp->fd, off, SEEK_SET) != off)
38650982Sbostic 		return (RET_ERROR);
38750982Sbostic 	if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize)
38850982Sbostic 		return (RET_ERROR);
38950982Sbostic 	b->flags &= ~MPOOL_DIRTY;
39050982Sbostic 	return (RET_SUCCESS);
39150982Sbostic }
39250982Sbostic 
39350982Sbostic /*
39450982Sbostic  * MPOOL_LOOK -- lookup a page
39550982Sbostic  *
39650982Sbostic  * Parameters:
39750982Sbostic  *	mp:	mpool cookie
39850982Sbostic  *	pgno:	page number
39950982Sbostic  *
40050982Sbostic  * Returns:
40150982Sbostic  *	NULL on failure and a pointer to the BKT on success
40250982Sbostic  */
40350982Sbostic static BKT *
40450982Sbostic mpool_look(mp, pgno)
40550982Sbostic 	MPOOL *mp;
40650982Sbostic 	pgno_t pgno;
40750982Sbostic {
40850982Sbostic 	register BKT *b;
40950982Sbostic 	register BKTHDR *tb;
41050982Sbostic 
41150982Sbostic 	/* XXX
41250982Sbostic 	 * If find the buffer, put it first on the hash chain so can
41350982Sbostic 	 * find it again quickly.
41450982Sbostic 	 */
41550982Sbostic 	tb = &mp->hashtable[HASHKEY(pgno)];
41650982Sbostic 	for (b = tb->hnext; b != (BKT *)tb; b = b->hnext)
41750982Sbostic 		if (b->pgno == pgno) {
41850982Sbostic #ifdef STATISTICS
41950982Sbostic 			++mp->cachehit;
42050982Sbostic #endif
42150982Sbostic 			return (b);
42250982Sbostic 		}
42350982Sbostic #ifdef STATISTICS
42450982Sbostic 	++mp->cachemiss;
42550982Sbostic #endif
42650982Sbostic 	return (NULL);
42750982Sbostic }
42850982Sbostic 
42950982Sbostic #ifdef STATISTICS
43050982Sbostic /*
43150982Sbostic  * MPOOL_STAT -- cache statistics
43250982Sbostic  *
43350982Sbostic  * Parameters:
43450982Sbostic  *	mp:	mpool cookie
43550982Sbostic  */
43650982Sbostic void
43750982Sbostic mpool_stat(mp)
43850982Sbostic 	MPOOL *mp;
43950982Sbostic {
44050982Sbostic 	BKT *b;
44150982Sbostic 	int cnt;
44250982Sbostic 	char *sep;
44350982Sbostic 
44450982Sbostic 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
44550982Sbostic 	(void)fprintf(stderr,
44650982Sbostic 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
44750982Sbostic 	    mp->pagesize, mp->curcache, mp->maxcache);
44850982Sbostic 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
44950982Sbostic 	    mp->pageput, mp->pageget, mp->pagenew);
45050982Sbostic 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
45150982Sbostic 	    mp->pagealloc, mp->pageflush);
45250982Sbostic 	if (mp->cachehit + mp->cachemiss)
45350982Sbostic 		(void)fprintf(stderr,
45450982Sbostic 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
45550982Sbostic 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
45650982Sbostic 		    * 100, mp->cachehit, mp->cachemiss);
45750982Sbostic 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
45850982Sbostic 	    mp->pageread, mp->pagewrite);
45950982Sbostic 
46050982Sbostic 	sep = "";
46150982Sbostic 	cnt = 0;
46250982Sbostic 	for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
46350982Sbostic 		(void)fprintf(stderr, "%s%d", sep, b->pgno);
46450982Sbostic 		if (b->flags & MPOOL_DIRTY)
46550982Sbostic 			(void)fprintf(stderr, "d");
46650982Sbostic 		if (b->flags & MPOOL_PINNED)
46750982Sbostic 			(void)fprintf(stderr, "P");
46850982Sbostic 		if (++cnt == 10) {
46950982Sbostic 			sep = "\n";
47050982Sbostic 			cnt = 0;
47150982Sbostic 		} else
47250982Sbostic 			sep = ", ";
47350982Sbostic 
47450982Sbostic 	}
47550982Sbostic 	(void)fprintf(stderr, "\n");
47650982Sbostic }
47750982Sbostic #endif
47850982Sbostic 
47950982Sbostic #ifdef DEBUG
48050982Sbostic #if __STDC__
48150982Sbostic #include <stdarg.h>
48250982Sbostic #else
48350982Sbostic #include <varargs.h>
48450982Sbostic #endif
48550982Sbostic 
48650982Sbostic static void
48750982Sbostic #if __STDC__
48850982Sbostic err(const char *fmt, ...)
48950982Sbostic #else
49050982Sbostic err(fmt, va_alist)
49150982Sbostic 	char *fmt;
49250982Sbostic 	va_dcl
49350982Sbostic #endif
49450982Sbostic {
49550982Sbostic 	va_list ap;
49650982Sbostic #if __STDC__
49750982Sbostic 	va_start(ap, fmt);
49850982Sbostic #else
49950982Sbostic 	va_start(ap);
50050982Sbostic #endif
50150982Sbostic 	(void)vfprintf(stderr, fmt, ap);
50250982Sbostic 	va_end(ap);
50350982Sbostic 	(void)fprintf(stderr, "\n");
50450982Sbostic 	abort();
50550982Sbostic 	/* NOTREACHED */
50650982Sbostic }
50750982Sbostic #endif
508