xref: /minix3/lib/libc/db/mpool/mpool.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1 /*	$NetBSD: mpool.c,v 1.21 2013/12/14 18:04:00 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
35 
36 #include <sys/cdefs.h>
37 __RCSID("$NetBSD: mpool.c,v 1.21 2013/12/14 18:04:00 christos Exp $");
38 
39 #include "namespace.h"
40 #include <sys/queue.h>
41 #include <sys/stat.h>
42 
43 #include <errno.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 
49 #include <db.h>
50 
51 #define	__MPOOLINTERFACE_PRIVATE
52 #include <mpool.h>
53 
54 #ifdef __weak_alias
55 __weak_alias(mpool_close,_mpool_close)
56 __weak_alias(mpool_filter,_mpool_filter)
57 __weak_alias(mpool_get,_mpool_get)
58 __weak_alias(mpool_new,_mpool_new)
59 __weak_alias(mpool_open,_mpool_open)
60 __weak_alias(mpool_put,_mpool_put)
61 __weak_alias(mpool_sync,_mpool_sync)
62 #endif
63 
64 static BKT *mpool_bkt(MPOOL *);
65 static BKT *mpool_look(MPOOL *, pgno_t);
66 static int  mpool_write(MPOOL *, BKT *);
67 
68 /*
69  * mpool_open --
70  *	Initialize a memory pool.
71  */
72 /*ARGSUSED*/
73 MPOOL *
mpool_open(void * key,int fd,pgno_t pagesize,pgno_t maxcache)74 mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache)
75 {
76 	struct stat sb;
77 	MPOOL *mp;
78 	int entry;
79 
80 	/*
81 	 * Get information about the file.
82 	 *
83 	 * XXX
84 	 * We don't currently handle pipes, although we should.
85 	 */
86 	if (fstat(fd, &sb))
87 		return NULL;
88 	if (!S_ISREG(sb.st_mode)) {
89 		errno = ESPIPE;
90 		return NULL;
91 	}
92 
93 	/* Allocate and initialize the MPOOL cookie. */
94 	if ((mp = calloc(1, sizeof(*mp))) == NULL)
95 		return (NULL);
96 	TAILQ_INIT(&mp->lqh);
97 	for (entry = 0; entry < HASHSIZE; ++entry)
98 		TAILQ_INIT(&mp->hqh[entry]);
99 	mp->maxcache = maxcache;
100 	mp->npages = (pgno_t)(sb.st_size / pagesize);
101 	mp->pagesize = pagesize;
102 	mp->fd = fd;
103 	return mp;
104 }
105 
106 /*
107  * mpool_filter --
108  *	Initialize input/output filters.
109  */
110 void
mpool_filter(MPOOL * mp,void (* pgin)(void *,pgno_t,void *),void (* pgout)(void *,pgno_t,void *),void * pgcookie)111 mpool_filter(MPOOL *mp, void (*pgin)(void *, pgno_t, void *),
112     void (*pgout)(void *, pgno_t, void *), void *pgcookie)
113 {
114 	mp->pgin = pgin;
115 	mp->pgout = pgout;
116 	mp->pgcookie = pgcookie;
117 }
118 
119 /*
120  * mpool_new --
121  *	Get a new page of memory.
122  */
123 void *
mpool_new(MPOOL * mp,pgno_t * pgnoaddr)124 mpool_new( MPOOL *mp, pgno_t *pgnoaddr)
125 {
126 	struct _hqh *head;
127 	BKT *bp;
128 
129 	if (mp->npages == MAX_PAGE_NUMBER) {
130 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
131 		abort();
132 	}
133 #ifdef STATISTICS
134 	++mp->pagenew;
135 #endif
136 	/*
137 	 * Get a BKT from the cache.  Assign a new page number, attach
138 	 * it to the head of the hash chain, the tail of the lru chain,
139 	 * and return.
140 	 */
141 	if ((bp = mpool_bkt(mp)) == NULL)
142 		return NULL;
143 	*pgnoaddr = bp->pgno = mp->npages++;
144 	bp->flags = MPOOL_PINNED;
145 
146 	head = &mp->hqh[HASHKEY(bp->pgno)];
147 	TAILQ_INSERT_HEAD(head, bp, hq);
148 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
149 	return bp->page;
150 }
151 
152 /*
153  * mpool_get
154  *	Get a page.
155  */
156 /*ARGSUSED*/
157 void *
mpool_get(MPOOL * mp,pgno_t pgno,u_int flags)158 mpool_get(MPOOL *mp, pgno_t pgno, u_int flags)
159 {
160 	struct _hqh *head;
161 	BKT *bp;
162 	off_t off;
163 	ssize_t nr;
164 
165 	/* Check for attempt to retrieve a non-existent page. */
166 	if (pgno >= mp->npages) {
167 		errno = EINVAL;
168 		return NULL;
169 	}
170 
171 #ifdef STATISTICS
172 	++mp->pageget;
173 #endif
174 
175 	/* Check for a page that is cached. */
176 	if ((bp = mpool_look(mp, pgno)) != NULL) {
177 #ifdef DEBUG
178 		if (bp->flags & MPOOL_PINNED) {
179 			(void)fprintf(stderr,
180 			    "mpool_get: page %d already pinned\n", bp->pgno);
181 			abort();
182 		}
183 #endif
184 		/*
185 		 * Move the page to the head of the hash chain and the tail
186 		 * of the lru chain.
187 		 */
188 		head = &mp->hqh[HASHKEY(bp->pgno)];
189 		TAILQ_REMOVE(head, bp, hq);
190 		TAILQ_INSERT_HEAD(head, bp, hq);
191 		TAILQ_REMOVE(&mp->lqh, bp, q);
192 		TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
193 
194 		/* Return a pinned page. */
195 		bp->flags |= MPOOL_PINNED;
196 		return bp->page;
197 	}
198 
199 	/* Get a page from the cache. */
200 	if ((bp = mpool_bkt(mp)) == NULL)
201 		return NULL;
202 
203 	/* Read in the contents. */
204 #ifdef STATISTICS
205 	++mp->pageread;
206 #endif
207 	off = mp->pagesize * pgno;
208 	if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
209 		if (nr >= 0)
210 			errno = EFTYPE;
211 		return NULL;
212 	}
213 
214 	/* Set the page number, pin the page. */
215 	bp->pgno = pgno;
216 	bp->flags = MPOOL_PINNED;
217 
218 	/*
219 	 * Add the page to the head of the hash chain and the tail
220 	 * of the lru chain.
221 	 */
222 	head = &mp->hqh[HASHKEY(bp->pgno)];
223 	TAILQ_INSERT_HEAD(head, bp, hq);
224 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
225 
226 	/* Run through the user's filter. */
227 	if (mp->pgin != NULL)
228 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
229 
230 	return bp->page;
231 }
232 
233 /*
234  * mpool_put
235  *	Return a page.
236  */
237 /*ARGSUSED*/
238 int
mpool_put(MPOOL * mp,void * page,u_int flags)239 mpool_put(MPOOL *mp, void *page, u_int flags)
240 {
241 	BKT *bp;
242 
243 #ifdef STATISTICS
244 	++mp->pageput;
245 #endif
246 	bp = (void *)((intptr_t)page - sizeof(BKT));
247 #ifdef DEBUG
248 	if (!(bp->flags & MPOOL_PINNED)) {
249 		(void)fprintf(stderr,
250 		    "mpool_put: page %d not pinned\n", bp->pgno);
251 		abort();
252 	}
253 #endif
254 	bp->flags &= ~MPOOL_PINNED;
255 	bp->flags |= flags & MPOOL_DIRTY;
256 	return (RET_SUCCESS);
257 }
258 
259 /*
260  * mpool_close
261  *	Close the buffer pool.
262  */
263 int
mpool_close(MPOOL * mp)264 mpool_close(MPOOL *mp)
265 {
266 	BKT *bp;
267 
268 	/* Free up any space allocated to the lru pages. */
269 	while (!TAILQ_EMPTY(&mp->lqh)) {
270 		bp = TAILQ_FIRST(&mp->lqh);
271 		TAILQ_REMOVE(&mp->lqh, bp, q);
272 		free(bp);
273 	}
274 
275 	/* Free the MPOOL cookie. */
276 	free(mp);
277 	return RET_SUCCESS;
278 }
279 
280 /*
281  * mpool_sync
282  *	Sync the pool to disk.
283  */
284 int
mpool_sync(MPOOL * mp)285 mpool_sync(MPOOL *mp)
286 {
287 	BKT *bp;
288 
289 	/* Walk the lru chain, flushing any dirty pages to disk. */
290 	TAILQ_FOREACH(bp, &mp->lqh, q)
291 		if (bp->flags & MPOOL_DIRTY &&
292 		    mpool_write(mp, bp) == RET_ERROR)
293 			return RET_ERROR;
294 
295 	/* Sync the file descriptor. */
296 	return fsync(mp->fd) ? RET_ERROR : RET_SUCCESS;
297 }
298 
299 /*
300  * mpool_bkt
301  *	Get a page from the cache (or create one).
302  */
303 static BKT *
mpool_bkt(MPOOL * mp)304 mpool_bkt(MPOOL *mp)
305 {
306 	struct _hqh *head;
307 	BKT *bp;
308 
309 	/* If under the max cached, always create a new page. */
310 	if (mp->curcache < mp->maxcache)
311 		goto new;
312 
313 	/*
314 	 * If the cache is max'd out, walk the lru list for a buffer we
315 	 * can flush.  If we find one, write it (if necessary) and take it
316 	 * off any lists.  If we don't find anything we grow the cache anyway.
317 	 * The cache never shrinks.
318 	 */
319 	TAILQ_FOREACH(bp, &mp->lqh, q)
320 		if (!(bp->flags & MPOOL_PINNED)) {
321 			/* Flush if dirty. */
322 			if (bp->flags & MPOOL_DIRTY &&
323 			    mpool_write(mp, bp) == RET_ERROR)
324 				return NULL;
325 #ifdef STATISTICS
326 			++mp->pageflush;
327 #endif
328 			/* Remove from the hash and lru queues. */
329 			head = &mp->hqh[HASHKEY(bp->pgno)];
330 			TAILQ_REMOVE(head, bp, hq);
331 			TAILQ_REMOVE(&mp->lqh, bp, q);
332 #ifdef DEBUG
333 			{
334 				void *spage = bp->page;
335 				(void)memset(bp, 0xff,
336 				    (size_t)(sizeof(BKT) + mp->pagesize));
337 				bp->page = spage;
338 			}
339 #endif
340 			return bp;
341 		}
342 
343 new:	if ((bp = calloc(1, (size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
344 		return NULL;
345 #ifdef STATISTICS
346 	++mp->pagealloc;
347 #endif
348 #if defined(DEBUG) || defined(PURIFY)
349 	(void)memset(bp, 0xff, (size_t)(sizeof(BKT) + mp->pagesize));
350 #endif
351 	bp->page = (void *)((intptr_t)bp + sizeof(BKT));
352 	++mp->curcache;
353 	return bp;
354 }
355 
356 /*
357  * mpool_write
358  *	Write a page to disk.
359  */
360 static int
mpool_write(MPOOL * mp,BKT * bp)361 mpool_write(MPOOL *mp, BKT *bp)
362 {
363 	off_t off;
364 
365 #ifdef STATISTICS
366 	++mp->pagewrite;
367 #endif
368 
369 	/* Run through the user's filter. */
370 	if (mp->pgout)
371 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
372 
373 	off = mp->pagesize * bp->pgno;
374 	if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) !=
375 	    (ssize_t)mp->pagesize)
376 		return RET_ERROR;
377 
378 	/*
379 	 * Re-run through the input filter since this page may soon be
380 	 * accessed via the cache, and whatever the user's output filter
381 	 * did may screw things up if we don't let the input filter
382 	 * restore the in-core copy.
383 	 */
384 	if (mp->pgin)
385 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
386 
387 	bp->flags &= ~MPOOL_DIRTY;
388 	return RET_SUCCESS;
389 }
390 
391 /*
392  * mpool_look
393  *	Lookup a page in the cache.
394  */
395 static BKT *
mpool_look(MPOOL * mp,pgno_t pgno)396 mpool_look(MPOOL *mp, pgno_t pgno)
397 {
398 	struct _hqh *head;
399 	BKT *bp;
400 
401 	head = &mp->hqh[HASHKEY(pgno)];
402 	TAILQ_FOREACH(bp, head, hq)
403 		if (bp->pgno == pgno) {
404 #ifdef STATISTICS
405 			++mp->cachehit;
406 #endif
407 			return bp;
408 		}
409 #ifdef STATISTICS
410 	++mp->cachemiss;
411 #endif
412 	return NULL;
413 }
414 
415 #ifdef STATISTICS
416 /*
417  * mpool_stat
418  *	Print out cache statistics.
419  */
420 void
mpool_stat(mp)421 mpool_stat(mp)
422 	MPOOL *mp;
423 {
424 	BKT *bp;
425 	int cnt;
426 	const char *sep;
427 
428 	(void)fprintf(stderr, "%lu pages in the file\n", (u_long)mp->npages);
429 	(void)fprintf(stderr,
430 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
431 	    (u_long)mp->pagesize, (u_long)mp->curcache, (u_long)mp->maxcache);
432 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
433 	    mp->pageput, mp->pageget, mp->pagenew);
434 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
435 	    mp->pagealloc, mp->pageflush);
436 	if (mp->cachehit + mp->cachemiss)
437 		(void)fprintf(stderr,
438 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
439 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
440 		    * 100, mp->cachehit, mp->cachemiss);
441 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
442 	    mp->pageread, mp->pagewrite);
443 
444 	sep = "";
445 	cnt = 0;
446 	TAILQ_FOREACH(bp, &mp->lqh, q) {
447 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
448 		if (bp->flags & MPOOL_DIRTY)
449 			(void)fprintf(stderr, "d");
450 		if (bp->flags & MPOOL_PINNED)
451 			(void)fprintf(stderr, "P");
452 		if (++cnt == 10) {
453 			sep = "\n";
454 			cnt = 0;
455 		} else
456 			sep = ", ";
457 
458 	}
459 	(void)fprintf(stderr, "\n");
460 }
461 #endif
462