xref: /dflybsd-src/lib/libc/db/mpool/mpool.c (revision c9fbf0d3b1d54097180190816f8fa4f5d415b174)
1 /*-
2  * Copyright (c) 1990, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD: src/lib/libc/db/mpool/mpool.c,v 1.5.2.1 2001/03/05 23:05:01 obrien Exp $
30  * $DragonFly: src/lib/libc/db/mpool/mpool.c,v 1.6 2005/11/12 23:01:55 swildner Exp $
31  *
32  * @(#)mpool.c	8.5 (Berkeley) 7/26/94
33  */
34 
35 #include "namespace.h"
36 #include <sys/param.h>
37 #include <sys/queue.h>
38 #include <sys/stat.h>
39 
40 #include <errno.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45 #include "un-namespace.h"
46 
47 #include <db.h>
48 
49 #define	__MPOOLINTERFACE_PRIVATE
50 #include <mpool.h>
51 
52 static BKT *mpool_bkt (MPOOL *);
53 static BKT *mpool_look (MPOOL *, pgno_t);
54 static int  mpool_write (MPOOL *, BKT *);
55 
56 /*
57  * mpool_open --
58  *	Initialize a memory pool.
59  */
60 MPOOL *
61 mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache)
62 {
63 	struct stat sb;
64 	MPOOL *mp;
65 	int entry;
66 
67 	/*
68 	 * Get information about the file.
69 	 *
70 	 * XXX
71 	 * We don't currently handle pipes, although we should.
72 	 */
73 	if (_fstat(fd, &sb))
74 		return (NULL);
75 	if (!S_ISREG(sb.st_mode)) {
76 		errno = ESPIPE;
77 		return (NULL);
78 	}
79 
80 	/* Allocate and initialize the MPOOL cookie. */
81 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
82 		return (NULL);
83 	TAILQ_INIT(&mp->lqh);
84 	for (entry = 0; entry < HASHSIZE; ++entry)
85 		TAILQ_INIT(&mp->hqh[entry]);
86 	mp->maxcache = maxcache;
87 	mp->npages = sb.st_size / pagesize;
88 	mp->pagesize = pagesize;
89 	mp->fd = fd;
90 	return (mp);
91 }
92 
93 /*
94  * mpool_filter --
95  *	Initialize input/output filters.
96  */
97 void
98 mpool_filter(MPOOL *mp, void (*pgin)(void *, pgno_t, void *),
99 	     void (*pgout)(void *, pgno_t, void *), void *pgcookie)
100 {
101 	mp->pgin = pgin;
102 	mp->pgout = pgout;
103 	mp->pgcookie = pgcookie;
104 }
105 
106 /*
107  * mpool_new --
108  *	Get a new page of memory.
109  */
110 void *
111 mpool_new(MPOOL *mp, pgno_t *pgnoaddr)
112 {
113 	struct _hqh *head;
114 	BKT *bp;
115 
116 	if (mp->npages == MAX_PAGE_NUMBER) {
117 		fprintf(stderr, "mpool_new: page allocation overflow.\n");
118 		abort();
119 	}
120 #ifdef STATISTICS
121 	++mp->pagenew;
122 #endif
123 	/*
124 	 * Get a BKT from the cache.  Assign a new page number, attach
125 	 * it to the head of the hash chain, the tail of the lru chain,
126 	 * and return.
127 	 */
128 	if ((bp = mpool_bkt(mp)) == NULL)
129 		return (NULL);
130 	*pgnoaddr = bp->pgno = mp->npages++;
131 	bp->flags = MPOOL_PINNED;
132 
133 	head = &mp->hqh[HASHKEY(bp->pgno)];
134 	TAILQ_INSERT_HEAD(head, bp, hq);
135 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
136 	return (bp->page);
137 }
138 
139 /*
140  * mpool_get
141  *	Get a page.
142  */
143 void *
144 mpool_get(MPOOL *mp,
145 	  pgno_t pgno,
146 	  u_int flags)				/* XXX not used? */
147 {
148 	struct _hqh *head;
149 	BKT *bp;
150 	off_t off;
151 	int nr;
152 
153 	/* Check for attempt to retrieve a non-existent page. */
154 	if (pgno >= mp->npages) {
155 		errno = EINVAL;
156 		return (NULL);
157 	}
158 
159 #ifdef STATISTICS
160 	++mp->pageget;
161 #endif
162 
163 	/* Check for a page that is cached. */
164 	if ((bp = mpool_look(mp, pgno)) != NULL) {
165 #ifdef DEBUG
166 		if (bp->flags & MPOOL_PINNED) {
167 			fprintf(stderr,
168 			    "mpool_get: page %d already pinned\n", bp->pgno);
169 			abort();
170 		}
171 #endif
172 		/*
173 		 * Move the page to the head of the hash chain and the tail
174 		 * of the lru chain.
175 		 */
176 		head = &mp->hqh[HASHKEY(bp->pgno)];
177 		TAILQ_REMOVE(head, bp, hq);
178 		TAILQ_INSERT_HEAD(head, bp, hq);
179 		TAILQ_REMOVE(&mp->lqh, bp, q);
180 		TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
181 
182 		/* Return a pinned page. */
183 		bp->flags |= MPOOL_PINNED;
184 		return (bp->page);
185 	}
186 
187 	/* Get a page from the cache. */
188 	if ((bp = mpool_bkt(mp)) == NULL)
189 		return (NULL);
190 
191 	/* Read in the contents. */
192 #ifdef STATISTICS
193 	++mp->pageread;
194 #endif
195 	off = mp->pagesize * pgno;
196 	if (lseek(mp->fd, off, SEEK_SET) != off)
197 		return (NULL);
198 	if ((nr = _read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
199 		if (nr >= 0)
200 			errno = EFTYPE;
201 		return (NULL);
202 	}
203 
204 	/* Set the page number, pin the page. */
205 	bp->pgno = pgno;
206 	bp->flags = MPOOL_PINNED;
207 
208 	/*
209 	 * Add the page to the head of the hash chain and the tail
210 	 * of the lru chain.
211 	 */
212 	head = &mp->hqh[HASHKEY(bp->pgno)];
213 	TAILQ_INSERT_HEAD(head, bp, hq);
214 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
215 
216 	/* Run through the user's filter. */
217 	if (mp->pgin != NULL)
218 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
219 
220 	return (bp->page);
221 }
222 
223 /*
224  * mpool_put
225  *	Return a page.
226  */
227 int
228 mpool_put(MPOOL *mp, void *page, u_int flags)
229 {
230 	BKT *bp;
231 
232 #ifdef STATISTICS
233 	++mp->pageput;
234 #endif
235 	bp = (BKT *)((char *)page - sizeof(BKT));
236 #ifdef DEBUG
237 	if (!(bp->flags & MPOOL_PINNED)) {
238 		fprintf(stderr,
239 		    "mpool_put: page %d not pinned\n", bp->pgno);
240 		abort();
241 	}
242 #endif
243 	bp->flags &= ~MPOOL_PINNED;
244 	bp->flags |= flags & MPOOL_DIRTY;
245 	return (RET_SUCCESS);
246 }
247 
248 /*
249  * mpool_close
250  *	Close the buffer pool.
251  */
252 int
253 mpool_close(MPOOL *mp)
254 {
255 	BKT *bp;
256 
257 	/* Free up any space allocated to the lru pages. */
258 	while (!TAILQ_EMPTY(&mp->lqh)) {
259 		bp = TAILQ_FIRST(&mp->lqh);
260 		TAILQ_REMOVE(&mp->lqh, bp, q);
261 		free(bp);
262 	}
263 
264 	/* Free the MPOOL cookie. */
265 	free(mp);
266 	return (RET_SUCCESS);
267 }
268 
269 /*
270  * mpool_sync
271  *	Sync the pool to disk.
272  */
273 int
274 mpool_sync(MPOOL *mp)
275 {
276 	BKT *bp;
277 
278 	/* Walk the lru chain, flushing any dirty pages to disk. */
279 	TAILQ_FOREACH(bp, &mp->lqh, q)
280 		if (bp->flags & MPOOL_DIRTY &&
281 		    mpool_write(mp, bp) == RET_ERROR)
282 			return (RET_ERROR);
283 
284 	/* Sync the file descriptor. */
285 	return (_fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
286 }
287 
288 /*
289  * mpool_bkt
290  *	Get a page from the cache (or create one).
291  */
292 static BKT *
293 mpool_bkt(MPOOL *mp)
294 {
295 	struct _hqh *head;
296 	BKT *bp;
297 
298 	/* If under the max cached, always create a new page. */
299 	if (mp->curcache < mp->maxcache)
300 		goto new;
301 
302 	/*
303 	 * If the cache is max'd out, walk the lru list for a buffer we
304 	 * can flush.  If we find one, write it (if necessary) and take it
305 	 * off any lists.  If we don't find anything we grow the cache anyway.
306 	 * The cache never shrinks.
307 	 */
308 	TAILQ_FOREACH(bp, &mp->lqh, q)
309 		if (!(bp->flags & MPOOL_PINNED)) {
310 			/* Flush if dirty. */
311 			if (bp->flags & MPOOL_DIRTY &&
312 			    mpool_write(mp, bp) == RET_ERROR)
313 				return (NULL);
314 #ifdef STATISTICS
315 			++mp->pageflush;
316 #endif
317 			/* Remove from the hash and lru queues. */
318 			head = &mp->hqh[HASHKEY(bp->pgno)];
319 			TAILQ_REMOVE(head, bp, hq);
320 			TAILQ_REMOVE(&mp->lqh, bp, q);
321 #ifdef DEBUG
322 			{ void *spage;
323 				spage = bp->page;
324 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
325 				bp->page = spage;
326 			}
327 #endif
328 			return (bp);
329 		}
330 
331 new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
332 		return (NULL);
333 #ifdef STATISTICS
334 	++mp->pagealloc;
335 #endif
336 #if defined(DEBUG) || defined(PURIFY)
337 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
338 #endif
339 	bp->page = (char *)bp + sizeof(BKT);
340 	++mp->curcache;
341 	return (bp);
342 }
343 
344 /*
345  * mpool_write
346  *	Write a page to disk.
347  */
348 static int
349 mpool_write(MPOOL *mp, BKT *bp)
350 {
351 	off_t off;
352 
353 #ifdef STATISTICS
354 	++mp->pagewrite;
355 #endif
356 
357 	/* Run through the user's filter. */
358 	if (mp->pgout)
359 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
360 
361 	off = mp->pagesize * bp->pgno;
362 	if (lseek(mp->fd, off, SEEK_SET) != off)
363 		return (RET_ERROR);
364 	if (_write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
365 		return (RET_ERROR);
366 
367 	bp->flags &= ~MPOOL_DIRTY;
368 	return (RET_SUCCESS);
369 }
370 
371 /*
372  * mpool_look
373  *	Lookup a page in the cache.
374  */
375 static BKT *
376 mpool_look(MPOOL *mp, pgno_t pgno)
377 {
378 	struct _hqh *head;
379 	BKT *bp;
380 
381 	head = &mp->hqh[HASHKEY(pgno)];
382 	TAILQ_FOREACH(bp, head, hq)
383 		if (bp->pgno == pgno) {
384 #ifdef STATISTICS
385 			++mp->cachehit;
386 #endif
387 			return (bp);
388 		}
389 #ifdef STATISTICS
390 	++mp->cachemiss;
391 #endif
392 	return (NULL);
393 }
394 
395 #ifdef STATISTICS
396 /*
397  * mpool_stat
398  *	Print out cache statistics.
399  */
400 void
401 mpool_stat(MPOOL *mp)
402 {
403 	BKT *bp;
404 	int cnt;
405 	char *sep;
406 
407 	fprintf(stderr, "%lu pages in the file\n", mp->npages);
408 	fprintf(stderr,
409 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
410 	    mp->pagesize, mp->curcache, mp->maxcache);
411 	fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
412 	    mp->pageput, mp->pageget, mp->pagenew);
413 	fprintf(stderr, "%lu page allocs, %lu page flushes\n",
414 	    mp->pagealloc, mp->pageflush);
415 	if (mp->cachehit + mp->cachemiss)
416 		fprintf(stderr,
417 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
418 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
419 		    * 100, mp->cachehit, mp->cachemiss);
420 	fprintf(stderr, "%lu page reads, %lu page writes\n",
421 	    mp->pageread, mp->pagewrite);
422 
423 	sep = "";
424 	cnt = 0;
425 	TAILQ_FOREACH(bp, &mp->lqh, q) {
426 		fprintf(stderr, "%s%d", sep, bp->pgno);
427 		if (bp->flags & MPOOL_DIRTY)
428 			fprintf(stderr, "d");
429 		if (bp->flags & MPOOL_PINNED)
430 			fprintf(stderr, "P");
431 		if (++cnt == 10) {
432 			sep = "\n";
433 			cnt = 0;
434 		} else
435 			sep = ", ";
436 
437 	}
438 	fprintf(stderr, "\n");
439 }
440 #endif
441