xref: /openbsd-src/lib/libc/db/mpool/mpool.c (revision cb39b41371628601fbe4c618205356d538b9d08a)
1 /*	$OpenBSD: mpool.c,v 1.20 2015/01/16 16:48:51 deraadt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/queue.h>
33 #include <sys/stat.h>
34 
35 #include <errno.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 
41 #include <db.h>
42 
43 #define	__MPOOLINTERFACE_PRIVATE
44 #include <mpool.h>
45 
46 static BKT *mpool_bkt(MPOOL *);
47 static BKT *mpool_look(MPOOL *, pgno_t);
48 static int  mpool_write(MPOOL *, BKT *);
49 
50 /*
51  * mpool_open --
52  *	Initialize a memory pool.
53  */
54 /* ARGSUSED */
55 MPOOL *
56 mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache)
57 {
58 	struct stat sb;
59 	MPOOL *mp;
60 	int entry;
61 
62 	/*
63 	 * Get information about the file.
64 	 *
65 	 * XXX
66 	 * We don't currently handle pipes, although we should.
67 	 */
68 	if (fstat(fd, &sb))
69 		return (NULL);
70 	if (!S_ISREG(sb.st_mode)) {
71 		errno = ESPIPE;
72 		return (NULL);
73 	}
74 
75 	/* Allocate and initialize the MPOOL cookie. */
76 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
77 		return (NULL);
78 	TAILQ_INIT(&mp->lqh);
79 	for (entry = 0; entry < HASHSIZE; ++entry)
80 		TAILQ_INIT(&mp->hqh[entry]);
81 	mp->maxcache = maxcache;
82 	mp->npages = sb.st_size / pagesize;
83 	mp->pagesize = pagesize;
84 	mp->fd = fd;
85 	return (mp);
86 }
87 
88 /*
89  * mpool_filter --
90  *	Initialize input/output filters.
91  */
92 void
93 mpool_filter(MPOOL *mp, void (*pgin) (void *, pgno_t, void *),
94     void (*pgout) (void *, pgno_t, void *), void *pgcookie)
95 {
96 	mp->pgin = pgin;
97 	mp->pgout = pgout;
98 	mp->pgcookie = pgcookie;
99 }
100 
101 /*
102  * mpool_new --
103  *	Get a new page of memory.
104  */
105 void *
106 mpool_new(MPOOL *mp, pgno_t *pgnoaddr, u_int flags)
107 {
108 	struct _hqh *head;
109 	BKT *bp;
110 
111 	if (mp->npages == MAX_PAGE_NUMBER) {
112 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
113 		abort();
114 	}
115 #ifdef STATISTICS
116 	++mp->pagenew;
117 #endif
118 	/*
119 	 * Get a BKT from the cache.  Assign a new page number, attach
120 	 * it to the head of the hash chain, the tail of the lru chain,
121 	 * and return.
122 	 */
123 	if ((bp = mpool_bkt(mp)) == NULL)
124 		return (NULL);
125 	if (flags == MPOOL_PAGE_REQUEST) {
126 		mp->npages++;
127 		bp->pgno = *pgnoaddr;
128 	} else
129 		bp->pgno = *pgnoaddr = mp->npages++;
130 
131 	bp->flags = MPOOL_PINNED | MPOOL_INUSE;
132 
133 	head = &mp->hqh[HASHKEY(bp->pgno)];
134 	TAILQ_INSERT_HEAD(head, bp, hq);
135 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
136 	return (bp->page);
137 }
138 
139 int
140 mpool_delete(MPOOL *mp, void *page)
141 {
142 	struct _hqh *head;
143 	BKT *bp;
144 
145 	bp = (BKT *)((char *)page - sizeof(BKT));
146 
147 #ifdef DEBUG
148 	if (!(bp->flags & MPOOL_PINNED)) {
149 		(void)fprintf(stderr,
150 		    "mpool_delete: page %d not pinned\n", bp->pgno);
151 		abort();
152 	}
153 #endif
154 
155 	/* Remove from the hash and lru queues. */
156 	head = &mp->hqh[HASHKEY(bp->pgno)];
157 	TAILQ_REMOVE(head, bp, hq);
158 	TAILQ_REMOVE(&mp->lqh, bp, q);
159 
160 	free(bp);
161 	mp->curcache--;
162 	return (RET_SUCCESS);
163 }
164 
165 /*
166  * mpool_get
167  *	Get a page.
168  */
169 /* ARGSUSED */
170 void *
171 mpool_get(MPOOL *mp, pgno_t pgno,
172     u_int flags)		/* XXX not used? */
173 {
174 	struct _hqh *head;
175 	BKT *bp;
176 	off_t off;
177 	int nr;
178 
179 #ifdef STATISTICS
180 	++mp->pageget;
181 #endif
182 
183 	/* Check for a page that is cached. */
184 	if ((bp = mpool_look(mp, pgno)) != NULL) {
185 #ifdef DEBUG
186 		if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) {
187 			(void)fprintf(stderr,
188 			    "mpool_get: page %d already pinned\n", bp->pgno);
189 			abort();
190 		}
191 #endif
192 		/*
193 		 * Move the page to the head of the hash chain and the tail
194 		 * of the lru chain.
195 		 */
196 		head = &mp->hqh[HASHKEY(bp->pgno)];
197 		TAILQ_REMOVE(head, bp, hq);
198 		TAILQ_INSERT_HEAD(head, bp, hq);
199 		TAILQ_REMOVE(&mp->lqh, bp, q);
200 		TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
201 
202 		/* Return a pinned page. */
203 		bp->flags |= MPOOL_PINNED;
204 		return (bp->page);
205 	}
206 
207 	/* Get a page from the cache. */
208 	if ((bp = mpool_bkt(mp)) == NULL)
209 		return (NULL);
210 
211 	/* Read in the contents. */
212 	off = mp->pagesize * pgno;
213 	if ((nr = pread(mp->fd, bp->page, mp->pagesize, off)) != mp->pagesize) {
214 		switch (nr) {
215 		case -1:
216 			/* errno is set for us by pread(). */
217 			free(bp);
218 			mp->curcache--;
219 			return (NULL);
220 		case 0:
221 			/*
222 			 * A zero-length read means you need to create a
223 			 * new page.
224 			 */
225 			memset(bp->page, 0, mp->pagesize);
226 			break;
227 		default:
228 			/* A partial read is definitely bad. */
229 			free(bp);
230 			mp->curcache--;
231 			errno = EINVAL;
232 			return (NULL);
233 		}
234 	}
235 #ifdef STATISTICS
236 	++mp->pageread;
237 #endif
238 
239 	/* Set the page number, pin the page. */
240 	bp->pgno = pgno;
241 	if (!(flags & MPOOL_IGNOREPIN))
242 		bp->flags = MPOOL_PINNED;
243 	bp->flags |= MPOOL_INUSE;
244 
245 	/*
246 	 * Add the page to the head of the hash chain and the tail
247 	 * of the lru chain.
248 	 */
249 	head = &mp->hqh[HASHKEY(bp->pgno)];
250 	TAILQ_INSERT_HEAD(head, bp, hq);
251 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
252 
253 	/* Run through the user's filter. */
254 	if (mp->pgin != NULL)
255 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
256 
257 	return (bp->page);
258 }
259 
260 /*
261  * mpool_put
262  *	Return a page.
263  */
264 /* ARGSUSED */
265 int
266 mpool_put(MPOOL *mp, void *page, u_int flags)
267 {
268 	BKT *bp;
269 
270 #ifdef STATISTICS
271 	++mp->pageput;
272 #endif
273 	bp = (BKT *)((char *)page - sizeof(BKT));
274 #ifdef DEBUG
275 	if (!(bp->flags & MPOOL_PINNED)) {
276 		(void)fprintf(stderr,
277 		    "mpool_put: page %d not pinned\n", bp->pgno);
278 		abort();
279 	}
280 #endif
281 	bp->flags &= ~MPOOL_PINNED;
282 	if (flags & MPOOL_DIRTY)
283 		bp->flags |= flags & MPOOL_DIRTY;
284 	return (RET_SUCCESS);
285 }
286 
287 /*
288  * mpool_close
289  *	Close the buffer pool.
290  */
291 int
292 mpool_close(MPOOL *mp)
293 {
294 	BKT *bp;
295 
296 	/* Free up any space allocated to the lru pages. */
297 	while ((bp = TAILQ_FIRST(&mp->lqh))) {
298 		TAILQ_REMOVE(&mp->lqh, bp, q);
299 		free(bp);
300 	}
301 
302 	/* Free the MPOOL cookie. */
303 	free(mp);
304 	return (RET_SUCCESS);
305 }
306 
307 /*
308  * mpool_sync
309  *	Sync the pool to disk.
310  */
311 int
312 mpool_sync(MPOOL *mp)
313 {
314 	BKT *bp;
315 
316 	/* Walk the lru chain, flushing any dirty pages to disk. */
317 	TAILQ_FOREACH(bp, &mp->lqh, q)
318 		if (bp->flags & MPOOL_DIRTY &&
319 		    mpool_write(mp, bp) == RET_ERROR)
320 			return (RET_ERROR);
321 
322 	/* Sync the file descriptor. */
323 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
324 }
325 
326 /*
327  * mpool_bkt
328  *	Get a page from the cache (or create one).
329  */
330 static BKT *
331 mpool_bkt(MPOOL *mp)
332 {
333 	struct _hqh *head;
334 	BKT *bp;
335 
336 	/* If under the max cached, always create a new page. */
337 	if (mp->curcache < mp->maxcache)
338 		goto new;
339 
340 	/*
341 	 * If the cache is max'd out, walk the lru list for a buffer we
342 	 * can flush.  If we find one, write it (if necessary) and take it
343 	 * off any lists.  If we don't find anything we grow the cache anyway.
344 	 * The cache never shrinks.
345 	 */
346 	TAILQ_FOREACH(bp, &mp->lqh, q)
347 		if (!(bp->flags & MPOOL_PINNED)) {
348 			/* Flush if dirty. */
349 			if (bp->flags & MPOOL_DIRTY &&
350 			    mpool_write(mp, bp) == RET_ERROR)
351 				return (NULL);
352 #ifdef STATISTICS
353 			++mp->pageflush;
354 #endif
355 			/* Remove from the hash and lru queues. */
356 			head = &mp->hqh[HASHKEY(bp->pgno)];
357 			TAILQ_REMOVE(head, bp, hq);
358 			TAILQ_REMOVE(&mp->lqh, bp, q);
359 #ifdef DEBUG
360 			{ void *spage;
361 				spage = bp->page;
362 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
363 				bp->page = spage;
364 			}
365 #endif
366 			bp->flags = 0;
367 			return (bp);
368 		}
369 
370 new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
371 		return (NULL);
372 #ifdef STATISTICS
373 	++mp->pagealloc;
374 #endif
375 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
376 	bp->page = (char *)bp + sizeof(BKT);
377 	bp->flags = 0;
378 	++mp->curcache;
379 	return (bp);
380 }
381 
382 /*
383  * mpool_write
384  *	Write a page to disk.
385  */
386 static int
387 mpool_write(MPOOL *mp, BKT *bp)
388 {
389 	off_t off;
390 
391 #ifdef STATISTICS
392 	++mp->pagewrite;
393 #endif
394 
395 	/* Run through the user's filter. */
396 	if (mp->pgout)
397 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
398 
399 	off = mp->pagesize * bp->pgno;
400 	if (pwrite(mp->fd, bp->page, mp->pagesize, off) != mp->pagesize)
401 		return (RET_ERROR);
402 
403 	/*
404 	 * Re-run through the input filter since this page may soon be
405 	 * accessed via the cache, and whatever the user's output filter
406 	 * did may screw things up if we don't let the input filter
407 	 * restore the in-core copy.
408 	 */
409 	if (mp->pgin)
410 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
411 
412 	bp->flags &= ~MPOOL_DIRTY;
413 	return (RET_SUCCESS);
414 }
415 
416 /*
417  * mpool_look
418  *	Lookup a page in the cache.
419  */
420 static BKT *
421 mpool_look(MPOOL *mp, pgno_t pgno)
422 {
423 	struct _hqh *head;
424 	BKT *bp;
425 
426 	head = &mp->hqh[HASHKEY(pgno)];
427 	TAILQ_FOREACH(bp, head, hq)
428 		if ((bp->pgno == pgno) &&
429 			((bp->flags & MPOOL_INUSE) == MPOOL_INUSE)) {
430 #ifdef STATISTICS
431 			++mp->cachehit;
432 #endif
433 			return (bp);
434 		}
435 #ifdef STATISTICS
436 	++mp->cachemiss;
437 #endif
438 	return (NULL);
439 }
440 
441 #ifdef STATISTICS
442 /*
443  * mpool_stat
444  *	Print out cache statistics.
445  */
446 void
447 mpool_stat(MPOOL *mp)
448 {
449 	BKT *bp;
450 	int cnt;
451 	char *sep;
452 
453 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
454 	(void)fprintf(stderr,
455 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
456 	    mp->pagesize, mp->curcache, mp->maxcache);
457 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
458 	    mp->pageput, mp->pageget, mp->pagenew);
459 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
460 	    mp->pagealloc, mp->pageflush);
461 	if (mp->cachehit + mp->cachemiss)
462 		(void)fprintf(stderr,
463 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
464 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
465 		    * 100, mp->cachehit, mp->cachemiss);
466 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
467 	    mp->pageread, mp->pagewrite);
468 
469 	sep = "";
470 	cnt = 0;
471 	TAILQ_FOREACH(bp, &mp->lqh, q) {
472 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
473 		if (bp->flags & MPOOL_DIRTY)
474 			(void)fprintf(stderr, "d");
475 		if (bp->flags & MPOOL_PINNED)
476 			(void)fprintf(stderr, "P");
477 		if (++cnt == 10) {
478 			sep = "\n";
479 			cnt = 0;
480 		} else
481 			sep = ", ";
482 
483 	}
484 	(void)fprintf(stderr, "\n");
485 }
486 #endif
487