xref: /openbsd-src/lib/libc/db/mpool/mpool.c (revision b725ae7711052a2233e31a66fefb8a752c388d7a)
1 /*	$OpenBSD: mpool.c,v 1.12 2003/06/02 20:18:34 millert Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #if defined(LIBC_SCCS) && !defined(lint)
33 #if 0
34 static char sccsid[] = "@(#)mpool.c	8.7 (Berkeley) 11/2/95";
35 #else
36 static const char rcsid[] = "$OpenBSD: mpool.c,v 1.12 2003/06/02 20:18:34 millert Exp $";
37 #endif
38 #endif /* LIBC_SCCS and not lint */
39 
40 #include <sys/param.h>
41 #include <sys/queue.h>
42 #include <sys/stat.h>
43 
44 #include <errno.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 
50 #include <db.h>
51 
52 #define	__MPOOLINTERFACE_PRIVATE
53 #include <mpool.h>
54 
55 static BKT *mpool_bkt(MPOOL *);
56 static BKT *mpool_look(MPOOL *, pgno_t);
57 static int  mpool_write(MPOOL *, BKT *);
58 
59 /*
60  * mpool_open --
61  *	Initialize a memory pool.
62  */
63 /* ARGSUSED */
64 MPOOL *
65 mpool_open(key, fd, pagesize, maxcache)
66 	void *key;
67 	int fd;
68 	pgno_t pagesize, maxcache;
69 {
70 	struct stat sb;
71 	MPOOL *mp;
72 	int entry;
73 
74 	/*
75 	 * Get information about the file.
76 	 *
77 	 * XXX
78 	 * We don't currently handle pipes, although we should.
79 	 */
80 	if (fstat(fd, &sb))
81 		return (NULL);
82 	if (!S_ISREG(sb.st_mode)) {
83 		errno = ESPIPE;
84 		return (NULL);
85 	}
86 
87 	/* Allocate and initialize the MPOOL cookie. */
88 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
89 		return (NULL);
90 	CIRCLEQ_INIT(&mp->lqh);
91 	for (entry = 0; entry < HASHSIZE; ++entry)
92 		CIRCLEQ_INIT(&mp->hqh[entry]);
93 	mp->maxcache = maxcache;
94 	mp->npages = sb.st_size / pagesize;
95 	mp->pagesize = pagesize;
96 	mp->fd = fd;
97 	return (mp);
98 }
99 
100 /*
101  * mpool_filter --
102  *	Initialize input/output filters.
103  */
104 void
105 mpool_filter(mp, pgin, pgout, pgcookie)
106 	MPOOL *mp;
107 	void (*pgin)(void *, pgno_t, void *);
108 	void (*pgout)(void *, pgno_t, void *);
109 	void *pgcookie;
110 {
111 	mp->pgin = pgin;
112 	mp->pgout = pgout;
113 	mp->pgcookie = pgcookie;
114 }
115 
116 /*
117  * mpool_new --
118  *	Get a new page of memory.
119  */
120 void *
121 mpool_new(mp, pgnoaddr, flags)
122 	MPOOL *mp;
123 	pgno_t *pgnoaddr;
124 	u_int flags;
125 {
126 	struct _hqh *head;
127 	BKT *bp;
128 
129 	if (mp->npages == MAX_PAGE_NUMBER) {
130 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
131 		abort();
132 	}
133 #ifdef STATISTICS
134 	++mp->pagenew;
135 #endif
136 	/*
137 	 * Get a BKT from the cache.  Assign a new page number, attach
138 	 * it to the head of the hash chain, the tail of the lru chain,
139 	 * and return.
140 	 */
141 	if ((bp = mpool_bkt(mp)) == NULL)
142 		return (NULL);
143 	if (flags == MPOOL_PAGE_REQUEST) {
144 		mp->npages++;
145 		bp->pgno = *pgnoaddr;
146 	} else
147 		bp->pgno = *pgnoaddr = mp->npages++;
148 
149 	bp->flags = MPOOL_PINNED | MPOOL_INUSE;
150 
151 	head = &mp->hqh[HASHKEY(bp->pgno)];
152 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
153 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
154 	return (bp->page);
155 }
156 
157 int
158 mpool_delete(mp, page)
159 	MPOOL *mp;
160 	void *page;
161 {
162 	struct _hqh *head;
163 	BKT *bp;
164 
165 	bp = (BKT *)((char *)page - sizeof(BKT));
166 
167 #ifdef DEBUG
168 	if (!(bp->flags & MPOOL_PINNED)) {
169 		(void)fprintf(stderr,
170 		    "mpool_delete: page %d not pinned\n", bp->pgno);
171 		abort();
172 	}
173 #endif
174 
175 	/* Remove from the hash and lru queues. */
176 	head = &mp->hqh[HASHKEY(bp->pgno)];
177 	CIRCLEQ_REMOVE(head, bp, hq);
178 	CIRCLEQ_REMOVE(&mp->lqh, bp, q);
179 
180 	free(bp);
181 	return (RET_SUCCESS);
182 }
183 
184 /*
185  * mpool_get
186  *	Get a page.
187  */
188 /* ARGSUSED */
189 void *
190 mpool_get(mp, pgno, flags)
191 	MPOOL *mp;
192 	pgno_t pgno;
193 	u_int flags;				/* XXX not used? */
194 {
195 	struct _hqh *head;
196 	BKT *bp;
197 	off_t off;
198 	int nr;
199 
200 #ifdef STATISTICS
201 	++mp->pageget;
202 #endif
203 
204 	/* Check for a page that is cached. */
205 	if ((bp = mpool_look(mp, pgno)) != NULL) {
206 #ifdef DEBUG
207 		if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) {
208 			(void)fprintf(stderr,
209 			    "mpool_get: page %d already pinned\n", bp->pgno);
210 			abort();
211 		}
212 #endif
213 		/*
214 		 * Move the page to the head of the hash chain and the tail
215 		 * of the lru chain.
216 		 */
217 		head = &mp->hqh[HASHKEY(bp->pgno)];
218 		CIRCLEQ_REMOVE(head, bp, hq);
219 		CIRCLEQ_INSERT_HEAD(head, bp, hq);
220 		CIRCLEQ_REMOVE(&mp->lqh, bp, q);
221 		CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
222 
223 		/* Return a pinned page. */
224 		bp->flags |= MPOOL_PINNED;
225 		return (bp->page);
226 	}
227 
228 	/* Get a page from the cache. */
229 	if ((bp = mpool_bkt(mp)) == NULL)
230 		return (NULL);
231 
232 	/* Read in the contents. */
233 #ifdef STATISTICS
234 	++mp->pageread;
235 #endif
236 	off = mp->pagesize * pgno;
237 	if ((nr = pread(mp->fd, bp->page, mp->pagesize, off)) != mp->pagesize) {
238 		switch (nr) {
239 		case -1:
240 			/* errno is set for us by pread(). */
241 			return (NULL);
242 		case 0:
243 			/*
244 			 * A zero-length read means you need to create a
245 			 * new page.
246 			 */
247 			memset(bp->page, 0, mp->pagesize);
248 		default:
249 			/* A partial read is definitely bad. */
250 			errno = EINVAL;
251 			return (NULL);
252 		}
253 	}
254 
255 	/* Set the page number, pin the page. */
256 	bp->pgno = pgno;
257 	if (!(flags & MPOOL_IGNOREPIN))
258 		bp->flags = MPOOL_PINNED;
259 	bp->flags |= MPOOL_INUSE;
260 
261 	/*
262 	 * Add the page to the head of the hash chain and the tail
263 	 * of the lru chain.
264 	 */
265 	head = &mp->hqh[HASHKEY(bp->pgno)];
266 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
267 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
268 
269 	/* Run through the user's filter. */
270 	if (mp->pgin != NULL)
271 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
272 
273 	return (bp->page);
274 }
275 
276 /*
277  * mpool_put
278  *	Return a page.
279  */
280 /* ARGSUSED */
281 int
282 mpool_put(mp, page, flags)
283 	MPOOL *mp;
284 	void *page;
285 	u_int flags;
286 {
287 	BKT *bp;
288 
289 #ifdef STATISTICS
290 	++mp->pageput;
291 #endif
292 	bp = (BKT *)((char *)page - sizeof(BKT));
293 #ifdef DEBUG
294 	if (!(bp->flags & MPOOL_PINNED)) {
295 		(void)fprintf(stderr,
296 		    "mpool_put: page %d not pinned\n", bp->pgno);
297 		abort();
298 	}
299 #endif
300 	bp->flags &= ~MPOOL_PINNED;
301 	if (flags & MPOOL_DIRTY)
302 		bp->flags |= flags & MPOOL_DIRTY;
303 	return (RET_SUCCESS);
304 }
305 
306 /*
307  * mpool_close
308  *	Close the buffer pool.
309  */
310 int
311 mpool_close(mp)
312 	MPOOL *mp;
313 {
314 	BKT *bp;
315 
316 	/* Free up any space allocated to the lru pages. */
317 	while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
318 		CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
319 		free(bp);
320 	}
321 
322 	/* Free the MPOOL cookie. */
323 	free(mp);
324 	return (RET_SUCCESS);
325 }
326 
327 /*
328  * mpool_sync
329  *	Sync the pool to disk.
330  */
331 int
332 mpool_sync(mp)
333 	MPOOL *mp;
334 {
335 	BKT *bp;
336 
337 	/* Walk the lru chain, flushing any dirty pages to disk. */
338 	for (bp = mp->lqh.cqh_first;
339 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
340 		if (bp->flags & MPOOL_DIRTY &&
341 		    mpool_write(mp, bp) == RET_ERROR)
342 			return (RET_ERROR);
343 
344 	/* Sync the file descriptor. */
345 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
346 }
347 
348 /*
349  * mpool_bkt
350  *	Get a page from the cache (or create one).
351  */
352 static BKT *
353 mpool_bkt(mp)
354 	MPOOL *mp;
355 {
356 	struct _hqh *head;
357 	BKT *bp;
358 
359 	/* If under the max cached, always create a new page. */
360 	if (mp->curcache < mp->maxcache)
361 		goto new;
362 
363 	/*
364 	 * If the cache is max'd out, walk the lru list for a buffer we
365 	 * can flush.  If we find one, write it (if necessary) and take it
366 	 * off any lists.  If we don't find anything we grow the cache anyway.
367 	 * The cache never shrinks.
368 	 */
369 	for (bp = mp->lqh.cqh_first;
370 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
371 		if (!(bp->flags & MPOOL_PINNED)) {
372 			/* Flush if dirty. */
373 			if (bp->flags & MPOOL_DIRTY &&
374 			    mpool_write(mp, bp) == RET_ERROR)
375 				return (NULL);
376 #ifdef STATISTICS
377 			++mp->pageflush;
378 #endif
379 			/* Remove from the hash and lru queues. */
380 			head = &mp->hqh[HASHKEY(bp->pgno)];
381 			CIRCLEQ_REMOVE(head, bp, hq);
382 			CIRCLEQ_REMOVE(&mp->lqh, bp, q);
383 #ifdef DEBUG
384 			{ void *spage;
385 				spage = bp->page;
386 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
387 				bp->page = spage;
388 			}
389 #endif
390 			bp->flags = 0;
391 			return (bp);
392 		}
393 
394 new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
395 		return (NULL);
396 #ifdef STATISTICS
397 	++mp->pagealloc;
398 #endif
399 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
400 	bp->page = (char *)bp + sizeof(BKT);
401 	bp->flags = 0;
402 	++mp->curcache;
403 	return (bp);
404 }
405 
406 /*
407  * mpool_write
408  *	Write a page to disk.
409  */
410 static int
411 mpool_write(mp, bp)
412 	MPOOL *mp;
413 	BKT *bp;
414 {
415 	off_t off;
416 
417 #ifdef STATISTICS
418 	++mp->pagewrite;
419 #endif
420 
421 	/* Run through the user's filter. */
422 	if (mp->pgout)
423 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
424 
425 	off = mp->pagesize * bp->pgno;
426 	if (pwrite(mp->fd, bp->page, mp->pagesize, off) != mp->pagesize)
427 		return (RET_ERROR);
428 
429 	/*
430 	 * Re-run through the input filter since this page may soon be
431 	 * accessed via the cache, and whatever the user's output filter
432 	 * did may screw things up if we don't let the input filter
433 	 * restore the in-core copy.
434 	 */
435 	if (mp->pgin)
436 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
437 
438 	bp->flags &= ~MPOOL_DIRTY;
439 	return (RET_SUCCESS);
440 }
441 
442 /*
443  * mpool_look
444  *	Lookup a page in the cache.
445  */
446 static BKT *
447 mpool_look(mp, pgno)
448 	MPOOL *mp;
449 	pgno_t pgno;
450 {
451 	struct _hqh *head;
452 	BKT *bp;
453 
454 	head = &mp->hqh[HASHKEY(pgno)];
455 	for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
456 		if ((bp->pgno == pgno) &&
457 			((bp->flags & MPOOL_INUSE) == MPOOL_INUSE)) {
458 #ifdef STATISTICS
459 			++mp->cachehit;
460 #endif
461 			return (bp);
462 		}
463 #ifdef STATISTICS
464 	++mp->cachemiss;
465 #endif
466 	return (NULL);
467 }
468 
469 #ifdef STATISTICS
470 /*
471  * mpool_stat
472  *	Print out cache statistics.
473  */
474 void
475 mpool_stat(mp)
476 	MPOOL *mp;
477 {
478 	BKT *bp;
479 	int cnt;
480 	char *sep;
481 
482 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
483 	(void)fprintf(stderr,
484 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
485 	    mp->pagesize, mp->curcache, mp->maxcache);
486 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
487 	    mp->pageput, mp->pageget, mp->pagenew);
488 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
489 	    mp->pagealloc, mp->pageflush);
490 	if (mp->cachehit + mp->cachemiss)
491 		(void)fprintf(stderr,
492 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
493 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
494 		    * 100, mp->cachehit, mp->cachemiss);
495 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
496 	    mp->pageread, mp->pagewrite);
497 
498 	sep = "";
499 	cnt = 0;
500 	for (bp = mp->lqh.cqh_first;
501 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
502 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
503 		if (bp->flags & MPOOL_DIRTY)
504 			(void)fprintf(stderr, "d");
505 		if (bp->flags & MPOOL_PINNED)
506 			(void)fprintf(stderr, "P");
507 		if (++cnt == 10) {
508 			sep = "\n";
509 			cnt = 0;
510 		} else
511 			sep = ", ";
512 
513 	}
514 	(void)fprintf(stderr, "\n");
515 }
516 #endif
517