xref: /minix3/lib/libc/db/mpool/mpool.c (revision 58a2b0008e28f606a7f7f5faaeaba4faac57a1ea)
1 /*	$NetBSD: mpool.c,v 1.19 2009/04/22 18:44:06 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
35 
36 #include <sys/cdefs.h>
37 #ifndef __minix
38 __RCSID("$NetBSD: mpool.c,v 1.19 2009/04/22 18:44:06 christos Exp $");
39 #endif
40 
41 #ifndef __minix
42 #include "namespace.h"
43 #endif
44 #include <sys/queue.h>
45 #include <sys/stat.h>
46 
47 #include <errno.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <unistd.h>
52 
53 #include <db.h>
54 
55 #define	__MPOOLINTERFACE_PRIVATE
56 #include <mpool.h>
57 
58 #ifdef __weak_alias
59 __weak_alias(mpool_close,_mpool_close)
60 __weak_alias(mpool_filter,_mpool_filter)
61 __weak_alias(mpool_get,_mpool_get)
62 __weak_alias(mpool_new,_mpool_new)
63 __weak_alias(mpool_open,_mpool_open)
64 __weak_alias(mpool_put,_mpool_put)
65 __weak_alias(mpool_sync,_mpool_sync)
66 #endif
67 
68 static BKT *mpool_bkt(MPOOL *);
69 static BKT *mpool_look(MPOOL *, pgno_t);
70 static int  mpool_write(MPOOL *, BKT *);
71 
72 /*
73  * mpool_open --
74  *	Initialize a memory pool.
75  */
76 /*ARGSUSED*/
77 MPOOL *
78 mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache)
79 {
80 	struct stat sb;
81 	MPOOL *mp;
82 	int entry;
83 
84 	/*
85 	 * Get information about the file.
86 	 *
87 	 * XXX
88 	 * We don't currently handle pipes, although we should.
89 	 */
90 	if (fstat(fd, &sb))
91 		return (NULL);
92 	if (!S_ISREG(sb.st_mode)) {
93 		errno = ESPIPE;
94 		return (NULL);
95 	}
96 
97 	/* Allocate and initialize the MPOOL cookie. */
98 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
99 		return (NULL);
100 	CIRCLEQ_INIT(&mp->lqh);
101 	for (entry = 0; entry < HASHSIZE; ++entry)
102 		CIRCLEQ_INIT(&mp->hqh[entry]);
103 	mp->maxcache = maxcache;
104 	mp->npages = (pgno_t)(sb.st_size / pagesize);
105 	mp->pagesize = pagesize;
106 	mp->fd = fd;
107 	return (mp);
108 }
109 
110 /*
111  * mpool_filter --
112  *	Initialize input/output filters.
113  */
114 void
115 mpool_filter(MPOOL *mp, void (*pgin)(void *, pgno_t, void *),
116     void (*pgout)(void *, pgno_t, void *), void *pgcookie)
117 {
118 	mp->pgin = pgin;
119 	mp->pgout = pgout;
120 	mp->pgcookie = pgcookie;
121 }
122 
123 /*
124  * mpool_new --
125  *	Get a new page of memory.
126  */
127 void *
128 mpool_new( MPOOL *mp, pgno_t *pgnoaddr)
129 {
130 	struct _hqh *head;
131 	BKT *bp;
132 
133 	if (mp->npages == MAX_PAGE_NUMBER) {
134 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
135 		abort();
136 	}
137 #ifdef STATISTICS
138 	++mp->pagenew;
139 #endif
140 	/*
141 	 * Get a BKT from the cache.  Assign a new page number, attach
142 	 * it to the head of the hash chain, the tail of the lru chain,
143 	 * and return.
144 	 */
145 	if ((bp = mpool_bkt(mp)) == NULL)
146 		return (NULL);
147 	*pgnoaddr = bp->pgno = mp->npages++;
148 	bp->flags = MPOOL_PINNED;
149 
150 	head = &mp->hqh[HASHKEY(bp->pgno)];
151 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
152 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
153 	return (bp->page);
154 }
155 
156 /*
157  * mpool_get
158  *	Get a page.
159  */
160 /*ARGSUSED*/
161 void *
162 mpool_get(MPOOL *mp, pgno_t pgno, u_int flags)
163 {
164 	struct _hqh *head;
165 	BKT *bp;
166 	off_t off;
167 	ssize_t nr;
168 
169 	/* Check for attempt to retrieve a non-existent page. */
170 	if (pgno >= mp->npages) {
171 		errno = EINVAL;
172 		return (NULL);
173 	}
174 
175 #ifdef STATISTICS
176 	++mp->pageget;
177 #endif
178 
179 	/* Check for a page that is cached. */
180 	if ((bp = mpool_look(mp, pgno)) != NULL) {
181 #ifdef DEBUG
182 		if (bp->flags & MPOOL_PINNED) {
183 			(void)fprintf(stderr,
184 			    "mpool_get: page %d already pinned\n", bp->pgno);
185 			abort();
186 		}
187 #endif
188 		/*
189 		 * Move the page to the head of the hash chain and the tail
190 		 * of the lru chain.
191 		 */
192 		head = &mp->hqh[HASHKEY(bp->pgno)];
193 		CIRCLEQ_REMOVE(head, bp, hq);
194 		CIRCLEQ_INSERT_HEAD(head, bp, hq);
195 		CIRCLEQ_REMOVE(&mp->lqh, bp, q);
196 		CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
197 
198 		/* Return a pinned page. */
199 		bp->flags |= MPOOL_PINNED;
200 		return (bp->page);
201 	}
202 
203 	/* Get a page from the cache. */
204 	if ((bp = mpool_bkt(mp)) == NULL)
205 		return (NULL);
206 
207 	/* Read in the contents. */
208 #ifdef STATISTICS
209 	++mp->pageread;
210 #endif
211 	off = mp->pagesize * pgno;
212 	if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
213 		if (nr >= 0)
214 			errno = EFTYPE;
215 		return (NULL);
216 	}
217 
218 	/* Set the page number, pin the page. */
219 	bp->pgno = pgno;
220 	bp->flags = MPOOL_PINNED;
221 
222 	/*
223 	 * Add the page to the head of the hash chain and the tail
224 	 * of the lru chain.
225 	 */
226 	head = &mp->hqh[HASHKEY(bp->pgno)];
227 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
228 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
229 
230 	/* Run through the user's filter. */
231 	if (mp->pgin != NULL)
232 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
233 
234 	return (bp->page);
235 }
236 
237 /*
238  * mpool_put
239  *	Return a page.
240  */
241 /*ARGSUSED*/
242 int
243 mpool_put(MPOOL *mp, void *page, u_int flags)
244 {
245 	BKT *bp;
246 
247 #ifdef STATISTICS
248 	++mp->pageput;
249 #endif
250 	bp = (BKT *)(void *)((char *)page - sizeof(BKT));
251 #ifdef DEBUG
252 	if (!(bp->flags & MPOOL_PINNED)) {
253 		(void)fprintf(stderr,
254 		    "mpool_put: page %d not pinned\n", bp->pgno);
255 		abort();
256 	}
257 #endif
258 	bp->flags &= ~MPOOL_PINNED;
259 	bp->flags |= flags & MPOOL_DIRTY;
260 	return (RET_SUCCESS);
261 }
262 
263 /*
264  * mpool_close
265  *	Close the buffer pool.
266  */
267 int
268 mpool_close(MPOOL *mp)
269 {
270 	BKT *bp;
271 
272 	/* Free up any space allocated to the lru pages. */
273 	while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
274 		CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
275 		free(bp);
276 	}
277 
278 	/* Free the MPOOL cookie. */
279 	free(mp);
280 	return (RET_SUCCESS);
281 }
282 
283 /*
284  * mpool_sync
285  *	Sync the pool to disk.
286  */
287 int
288 mpool_sync(MPOOL *mp)
289 {
290 	BKT *bp;
291 
292 	/* Walk the lru chain, flushing any dirty pages to disk. */
293 	for (bp = mp->lqh.cqh_first;
294 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
295 		if (bp->flags & MPOOL_DIRTY &&
296 		    mpool_write(mp, bp) == RET_ERROR)
297 			return (RET_ERROR);
298 
299 	/* Sync the file descriptor. */
300 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
301 }
302 
303 /*
304  * mpool_bkt
305  *	Get a page from the cache (or create one).
306  */
307 static BKT *
308 mpool_bkt(MPOOL *mp)
309 {
310 	struct _hqh *head;
311 	BKT *bp;
312 
313 	/* If under the max cached, always create a new page. */
314 	if (mp->curcache < mp->maxcache)
315 		goto new;
316 
317 	/*
318 	 * If the cache is max'd out, walk the lru list for a buffer we
319 	 * can flush.  If we find one, write it (if necessary) and take it
320 	 * off any lists.  If we don't find anything we grow the cache anyway.
321 	 * The cache never shrinks.
322 	 */
323 	for (bp = mp->lqh.cqh_first;
324 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
325 		if (!(bp->flags & MPOOL_PINNED)) {
326 			/* Flush if dirty. */
327 			if (bp->flags & MPOOL_DIRTY &&
328 			    mpool_write(mp, bp) == RET_ERROR)
329 				return (NULL);
330 #ifdef STATISTICS
331 			++mp->pageflush;
332 #endif
333 			/* Remove from the hash and lru queues. */
334 			head = &mp->hqh[HASHKEY(bp->pgno)];
335 			CIRCLEQ_REMOVE(head, bp, hq);
336 			CIRCLEQ_REMOVE(&mp->lqh, bp, q);
337 #ifdef DEBUG
338 			{
339 				void *spage = bp->page;
340 				(void)memset(bp, 0xff,
341 				    (size_t)(sizeof(BKT) + mp->pagesize));
342 				bp->page = spage;
343 			}
344 #endif
345 			return (bp);
346 		}
347 
348 new:	if ((bp = calloc(1, (size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
349 		return (NULL);
350 #ifdef STATISTICS
351 	++mp->pagealloc;
352 #endif
353 #if defined(DEBUG) || defined(PURIFY)
354 	(void)memset(bp, 0xff, (size_t)(sizeof(BKT) + mp->pagesize));
355 #endif
356 	bp->page = (char *)(void *)bp + sizeof(BKT);
357 	++mp->curcache;
358 	return (bp);
359 }
360 
361 /*
362  * mpool_write
363  *	Write a page to disk.
364  */
365 static int
366 mpool_write(MPOOL *mp, BKT *bp)
367 {
368 	off_t off;
369 
370 #ifdef STATISTICS
371 	++mp->pagewrite;
372 #endif
373 
374 	/* Run through the user's filter. */
375 	if (mp->pgout)
376 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
377 
378 	off = mp->pagesize * bp->pgno;
379 	if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) != (int)mp->pagesize)
380 		return (RET_ERROR);
381 
382 	/*
383 	 * Re-run through the input filter since this page may soon be
384 	 * accessed via the cache, and whatever the user's output filter
385 	 * did may screw things up if we don't let the input filter
386 	 * restore the in-core copy.
387 	 */
388 	if (mp->pgin)
389 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
390 
391 	bp->flags &= ~MPOOL_DIRTY;
392 	return (RET_SUCCESS);
393 }
394 
395 /*
396  * mpool_look
397  *	Lookup a page in the cache.
398  */
399 static BKT *
400 mpool_look(MPOOL *mp, pgno_t pgno)
401 {
402 	struct _hqh *head;
403 	BKT *bp;
404 
405 	head = &mp->hqh[HASHKEY(pgno)];
406 	for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
407 		if (bp->pgno == pgno) {
408 #ifdef STATISTICS
409 			++mp->cachehit;
410 #endif
411 			return (bp);
412 		}
413 #ifdef STATISTICS
414 	++mp->cachemiss;
415 #endif
416 	return (NULL);
417 }
418 
419 #ifdef STATISTICS
420 /*
421  * mpool_stat
422  *	Print out cache statistics.
423  */
424 void
425 mpool_stat(mp)
426 	MPOOL *mp;
427 {
428 	BKT *bp;
429 	int cnt;
430 	const char *sep;
431 
432 	(void)fprintf(stderr, "%lu pages in the file\n", (u_long)mp->npages);
433 	(void)fprintf(stderr,
434 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
435 	    (u_long)mp->pagesize, (u_long)mp->curcache, (u_long)mp->maxcache);
436 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
437 	    mp->pageput, mp->pageget, mp->pagenew);
438 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
439 	    mp->pagealloc, mp->pageflush);
440 	if (mp->cachehit + mp->cachemiss)
441 		(void)fprintf(stderr,
442 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
443 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
444 		    * 100, mp->cachehit, mp->cachemiss);
445 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
446 	    mp->pageread, mp->pagewrite);
447 
448 	sep = "";
449 	cnt = 0;
450 	for (bp = mp->lqh.cqh_first;
451 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
452 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
453 		if (bp->flags & MPOOL_DIRTY)
454 			(void)fprintf(stderr, "d");
455 		if (bp->flags & MPOOL_PINNED)
456 			(void)fprintf(stderr, "P");
457 		if (++cnt == 10) {
458 			sep = "\n";
459 			cnt = 0;
460 		} else
461 			sep = ", ";
462 
463 	}
464 	(void)fprintf(stderr, "\n");
465 }
466 #endif
467