xref: /netbsd-src/lib/libc/db/mpool/mpool.c (revision 23c8222edbfb0f0932d88a8351d3a0cf817dfb9e)
1 /*	$NetBSD: mpool.c,v 1.14 2003/08/07 16:42:44 agc Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 #if 0
35 static char sccsid[] = "@(#)mpool.c	8.5 (Berkeley) 7/26/94";
36 #else
37 __RCSID("$NetBSD: mpool.c,v 1.14 2003/08/07 16:42:44 agc Exp $");
38 #endif
39 #endif /* LIBC_SCCS and not lint */
40 
41 #include "namespace.h"
42 #include <sys/queue.h>
43 #include <sys/stat.h>
44 
45 #include <errno.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 
51 #include <db.h>
52 
53 #define	__MPOOLINTERFACE_PRIVATE
54 #include <mpool.h>
55 
56 #ifdef __weak_alias
57 __weak_alias(mpool_close,_mpool_close)
58 __weak_alias(mpool_filter,_mpool_filter)
59 __weak_alias(mpool_get,_mpool_get)
60 __weak_alias(mpool_new,_mpool_new)
61 __weak_alias(mpool_open,_mpool_open)
62 __weak_alias(mpool_put,_mpool_put)
63 __weak_alias(mpool_sync,_mpool_sync)
64 #endif
65 
66 static BKT *mpool_bkt __P((MPOOL *));
67 static BKT *mpool_look __P((MPOOL *, pgno_t));
68 static int  mpool_write __P((MPOOL *, BKT *));
69 
70 /*
71  * mpool_open --
72  *	Initialize a memory pool.
73  */
74 /*ARGSUSED*/
75 MPOOL *
76 mpool_open(key, fd, pagesize, maxcache)
77 	void *key;
78 	int fd;
79 	pgno_t pagesize, maxcache;
80 {
81 	struct stat sb;
82 	MPOOL *mp;
83 	int entry;
84 
85 	/*
86 	 * Get information about the file.
87 	 *
88 	 * XXX
89 	 * We don't currently handle pipes, although we should.
90 	 */
91 	if (fstat(fd, &sb))
92 		return (NULL);
93 	if (!S_ISREG(sb.st_mode)) {
94 		errno = ESPIPE;
95 		return (NULL);
96 	}
97 
98 	/* Allocate and initialize the MPOOL cookie. */
99 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
100 		return (NULL);
101 	CIRCLEQ_INIT(&mp->lqh);
102 	for (entry = 0; entry < HASHSIZE; ++entry)
103 		CIRCLEQ_INIT(&mp->hqh[entry]);
104 	mp->maxcache = maxcache;
105 	mp->npages = (pgno_t)(sb.st_size / pagesize);
106 	mp->pagesize = pagesize;
107 	mp->fd = fd;
108 	return (mp);
109 }
110 
111 /*
112  * mpool_filter --
113  *	Initialize input/output filters.
114  */
115 void
116 mpool_filter(mp, pgin, pgout, pgcookie)
117 	MPOOL *mp;
118 	void (*pgin) __P((void *, pgno_t, void *));
119 	void (*pgout) __P((void *, pgno_t, void *));
120 	void *pgcookie;
121 {
122 	mp->pgin = pgin;
123 	mp->pgout = pgout;
124 	mp->pgcookie = pgcookie;
125 }
126 
127 /*
128  * mpool_new --
129  *	Get a new page of memory.
130  */
131 void *
132 mpool_new(mp, pgnoaddr)
133 	MPOOL *mp;
134 	pgno_t *pgnoaddr;
135 {
136 	struct _hqh *head;
137 	BKT *bp;
138 
139 	if (mp->npages == MAX_PAGE_NUMBER) {
140 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
141 		abort();
142 	}
143 #ifdef STATISTICS
144 	++mp->pagenew;
145 #endif
146 	/*
147 	 * Get a BKT from the cache.  Assign a new page number, attach
148 	 * it to the head of the hash chain, the tail of the lru chain,
149 	 * and return.
150 	 */
151 	if ((bp = mpool_bkt(mp)) == NULL)
152 		return (NULL);
153 	*pgnoaddr = bp->pgno = mp->npages++;
154 	bp->flags = MPOOL_PINNED;
155 
156 	head = &mp->hqh[HASHKEY(bp->pgno)];
157 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
158 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
159 	return (bp->page);
160 }
161 
162 /*
163  * mpool_get
164  *	Get a page.
165  */
166 /*ARGSUSED*/
167 void *
168 mpool_get(mp, pgno, flags)
169 	MPOOL *mp;
170 	pgno_t pgno;
171 	u_int flags;				/* XXX not used? */
172 {
173 	struct _hqh *head;
174 	BKT *bp;
175 	off_t off;
176 	int nr;
177 
178 	/* Check for attempt to retrieve a non-existent page. */
179 	if (pgno >= mp->npages) {
180 		errno = EINVAL;
181 		return (NULL);
182 	}
183 
184 #ifdef STATISTICS
185 	++mp->pageget;
186 #endif
187 
188 	/* Check for a page that is cached. */
189 	if ((bp = mpool_look(mp, pgno)) != NULL) {
190 #ifdef DEBUG
191 		if (bp->flags & MPOOL_PINNED) {
192 			(void)fprintf(stderr,
193 			    "mpool_get: page %d already pinned\n", bp->pgno);
194 			abort();
195 		}
196 #endif
197 		/*
198 		 * Move the page to the head of the hash chain and the tail
199 		 * of the lru chain.
200 		 */
201 		head = &mp->hqh[HASHKEY(bp->pgno)];
202 		CIRCLEQ_REMOVE(head, bp, hq);
203 		CIRCLEQ_INSERT_HEAD(head, bp, hq);
204 		CIRCLEQ_REMOVE(&mp->lqh, bp, q);
205 		CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
206 
207 		/* Return a pinned page. */
208 		bp->flags |= MPOOL_PINNED;
209 		return (bp->page);
210 	}
211 
212 	/* Get a page from the cache. */
213 	if ((bp = mpool_bkt(mp)) == NULL)
214 		return (NULL);
215 
216 	/* Read in the contents. */
217 #ifdef STATISTICS
218 	++mp->pageread;
219 #endif
220 	off = mp->pagesize * pgno;
221 	if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
222 		if (nr >= 0)
223 			errno = EFTYPE;
224 		return (NULL);
225 	}
226 
227 	/* Set the page number, pin the page. */
228 	bp->pgno = pgno;
229 	bp->flags = MPOOL_PINNED;
230 
231 	/*
232 	 * Add the page to the head of the hash chain and the tail
233 	 * of the lru chain.
234 	 */
235 	head = &mp->hqh[HASHKEY(bp->pgno)];
236 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
237 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
238 
239 	/* Run through the user's filter. */
240 	if (mp->pgin != NULL)
241 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
242 
243 	return (bp->page);
244 }
245 
246 /*
247  * mpool_put
248  *	Return a page.
249  */
250 /*ARGSUSED*/
251 int
252 mpool_put(mp, page, flags)
253 	MPOOL *mp;
254 	void *page;
255 	u_int flags;
256 {
257 	BKT *bp;
258 
259 #ifdef STATISTICS
260 	++mp->pageput;
261 #endif
262 	bp = (BKT *)(void *)((char *)page - sizeof(BKT));
263 #ifdef DEBUG
264 	if (!(bp->flags & MPOOL_PINNED)) {
265 		(void)fprintf(stderr,
266 		    "mpool_put: page %d not pinned\n", bp->pgno);
267 		abort();
268 	}
269 #endif
270 	bp->flags &= ~MPOOL_PINNED;
271 	bp->flags |= flags & MPOOL_DIRTY;
272 	return (RET_SUCCESS);
273 }
274 
275 /*
276  * mpool_close
277  *	Close the buffer pool.
278  */
279 int
280 mpool_close(mp)
281 	MPOOL *mp;
282 {
283 	BKT *bp;
284 
285 	/* Free up any space allocated to the lru pages. */
286 	while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
287 		CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
288 		free(bp);
289 	}
290 
291 	/* Free the MPOOL cookie. */
292 	free(mp);
293 	return (RET_SUCCESS);
294 }
295 
296 /*
297  * mpool_sync
298  *	Sync the pool to disk.
299  */
300 int
301 mpool_sync(mp)
302 	MPOOL *mp;
303 {
304 	BKT *bp;
305 
306 	/* Walk the lru chain, flushing any dirty pages to disk. */
307 	for (bp = mp->lqh.cqh_first;
308 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
309 		if (bp->flags & MPOOL_DIRTY &&
310 		    mpool_write(mp, bp) == RET_ERROR)
311 			return (RET_ERROR);
312 
313 	/* Sync the file descriptor. */
314 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
315 }
316 
317 /*
318  * mpool_bkt
319  *	Get a page from the cache (or create one).
320  */
321 static BKT *
322 mpool_bkt(mp)
323 	MPOOL *mp;
324 {
325 	struct _hqh *head;
326 	BKT *bp;
327 
328 	/* If under the max cached, always create a new page. */
329 	if (mp->curcache < mp->maxcache)
330 		goto new;
331 
332 	/*
333 	 * If the cache is max'd out, walk the lru list for a buffer we
334 	 * can flush.  If we find one, write it (if necessary) and take it
335 	 * off any lists.  If we don't find anything we grow the cache anyway.
336 	 * The cache never shrinks.
337 	 */
338 	for (bp = mp->lqh.cqh_first;
339 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
340 		if (!(bp->flags & MPOOL_PINNED)) {
341 			/* Flush if dirty. */
342 			if (bp->flags & MPOOL_DIRTY &&
343 			    mpool_write(mp, bp) == RET_ERROR)
344 				return (NULL);
345 #ifdef STATISTICS
346 			++mp->pageflush;
347 #endif
348 			/* Remove from the hash and lru queues. */
349 			head = &mp->hqh[HASHKEY(bp->pgno)];
350 			CIRCLEQ_REMOVE(head, bp, hq);
351 			CIRCLEQ_REMOVE(&mp->lqh, bp, q);
352 #ifdef DEBUG
353 			{ void *spage;
354 				spage = bp->page;
355 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
356 				bp->page = spage;
357 			}
358 #endif
359 			return (bp);
360 		}
361 
362 new:	if ((bp = (BKT *)malloc((size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
363 		return (NULL);
364 #ifdef STATISTICS
365 	++mp->pagealloc;
366 #endif
367 #if defined(DEBUG) || defined(PURIFY)
368 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
369 #endif
370 	bp->page = (char *)(void *)bp + sizeof(BKT);
371 	++mp->curcache;
372 	return (bp);
373 }
374 
375 /*
376  * mpool_write
377  *	Write a page to disk.
378  */
379 static int
380 mpool_write(mp, bp)
381 	MPOOL *mp;
382 	BKT *bp;
383 {
384 	off_t off;
385 
386 #ifdef STATISTICS
387 	++mp->pagewrite;
388 #endif
389 
390 	/* Run through the user's filter. */
391 	if (mp->pgout)
392 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
393 
394 	off = mp->pagesize * bp->pgno;
395 	if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) != (int)mp->pagesize)
396 		return (RET_ERROR);
397 
398 	/*
399 	 * Re-run through the input filter since this page may soon be
400 	 * accessed via the cache, and whatever the user's output filter
401 	 * did may screw things up if we don't let the input filter
402 	 * restore the in-core copy.
403 	 */
404 	if (mp->pgin)
405 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
406 
407 	bp->flags &= ~MPOOL_DIRTY;
408 	return (RET_SUCCESS);
409 }
410 
411 /*
412  * mpool_look
413  *	Lookup a page in the cache.
414  */
415 static BKT *
416 mpool_look(mp, pgno)
417 	MPOOL *mp;
418 	pgno_t pgno;
419 {
420 	struct _hqh *head;
421 	BKT *bp;
422 
423 	head = &mp->hqh[HASHKEY(pgno)];
424 	for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
425 		if (bp->pgno == pgno) {
426 #ifdef STATISTICS
427 			++mp->cachehit;
428 #endif
429 			return (bp);
430 		}
431 #ifdef STATISTICS
432 	++mp->cachemiss;
433 #endif
434 	return (NULL);
435 }
436 
437 #ifdef STATISTICS
438 /*
439  * mpool_stat
440  *	Print out cache statistics.
441  */
442 void
443 mpool_stat(mp)
444 	MPOOL *mp;
445 {
446 	BKT *bp;
447 	int cnt;
448 	char *sep;
449 
450 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
451 	(void)fprintf(stderr,
452 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
453 	    mp->pagesize, mp->curcache, mp->maxcache);
454 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
455 	    mp->pageput, mp->pageget, mp->pagenew);
456 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
457 	    mp->pagealloc, mp->pageflush);
458 	if (mp->cachehit + mp->cachemiss)
459 		(void)fprintf(stderr,
460 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
461 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
462 		    * 100, mp->cachehit, mp->cachemiss);
463 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
464 	    mp->pageread, mp->pagewrite);
465 
466 	sep = "";
467 	cnt = 0;
468 	for (bp = mp->lqh.cqh_first;
469 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
470 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
471 		if (bp->flags & MPOOL_DIRTY)
472 			(void)fprintf(stderr, "d");
473 		if (bp->flags & MPOOL_PINNED)
474 			(void)fprintf(stderr, "P");
475 		if (++cnt == 10) {
476 			sep = "\n";
477 			cnt = 0;
478 		} else
479 			sep = ", ";
480 
481 	}
482 	(void)fprintf(stderr, "\n");
483 }
484 #endif
485