xref: /netbsd-src/lib/libc/db/mpool/mpool.c (revision e9d867ef5010fbab8d48045c13025636f5cd7479)
1 /*-
2  * Copyright (c) 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #if defined(LIBC_SCCS) && !defined(lint)
35 /*static char sccsid[] = "from: @(#)mpool.c	8.1 (Berkeley) 6/6/93";*/
36 static char rcsid[] = "$Id: mpool.c,v 1.2 1993/08/01 18:43:20 mycroft Exp $";
37 #endif /* LIBC_SCCS and not lint */
38 
39 #include <sys/param.h>
40 #include <sys/stat.h>
41 
42 #include <errno.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 
48 #include <db.h>
49 #define	__MPOOLINTERFACE_PRIVATE
50 #include "mpool.h"
51 
52 static BKT *mpool_bkt __P((MPOOL *));
53 static BKT *mpool_look __P((MPOOL *, pgno_t));
54 static int  mpool_write __P((MPOOL *, BKT *));
55 #ifdef DEBUG
56 static void __mpoolerr __P((const char *fmt, ...));
57 #endif
58 
59 /*
60  * MPOOL_OPEN -- initialize a memory pool.
61  *
62  * Parameters:
63  *	key:		Shared buffer key.
64  *	fd:		File descriptor.
65  *	pagesize:	File page size.
66  *	maxcache:	Max number of cached pages.
67  *
68  * Returns:
69  *	MPOOL pointer, NULL on error.
70  */
71 MPOOL *
72 mpool_open(key, fd, pagesize, maxcache)
73 	DBT *key;
74 	int fd;
75 	pgno_t pagesize, maxcache;
76 {
77 	struct stat sb;
78 	MPOOL *mp;
79 	int entry;
80 
81 	if (fstat(fd, &sb))
82 		return (NULL);
83 	/* XXX
84 	 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so
85 	 * that stat(2) returns true for ISSOCK on pipes.  Until then, this is
86 	 * fairly close.
87 	 */
88 	if (!S_ISREG(sb.st_mode)) {
89 		errno = ESPIPE;
90 		return (NULL);
91 	}
92 
93 	if ((mp = malloc(sizeof(MPOOL))) == NULL)
94 		return (NULL);
95 	mp->free.cnext = mp->free.cprev = (BKT *)&mp->free;
96 	mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru;
97 	for (entry = 0; entry < HASHSIZE; ++entry)
98 		mp->hashtable[entry].hnext = mp->hashtable[entry].hprev =
99 		    mp->hashtable[entry].cnext = mp->hashtable[entry].cprev =
100 		    (BKT *)&mp->hashtable[entry];
101 	mp->curcache = 0;
102 	mp->maxcache = maxcache;
103 	mp->pagesize = pagesize;
104 	mp->npages = sb.st_size / pagesize;
105 	mp->fd = fd;
106 	mp->pgcookie = NULL;
107 	mp->pgin = mp->pgout = NULL;
108 
109 #ifdef STATISTICS
110 	mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush =
111 	    mp->pageget = mp->pagenew = mp->pageput = mp->pageread =
112 	    mp->pagewrite = 0;
113 #endif
114 	return (mp);
115 }
116 
117 /*
118  * MPOOL_FILTER -- initialize input/output filters.
119  *
120  * Parameters:
121  *	pgin:		Page in conversion routine.
122  *	pgout:		Page out conversion routine.
123  *	pgcookie:	Cookie for page in/out routines.
124  */
125 void
126 mpool_filter(mp, pgin, pgout, pgcookie)
127 	MPOOL *mp;
128 	void (*pgin) __P((void *, pgno_t, void *));
129 	void (*pgout) __P((void *, pgno_t, void *));
130 	void *pgcookie;
131 {
132 	mp->pgin = pgin;
133 	mp->pgout = pgout;
134 	mp->pgcookie = pgcookie;
135 }
136 
137 /*
138  * MPOOL_NEW -- get a new page
139  *
140  * Parameters:
141  *	mp:		mpool cookie
142  *	pgnoadddr:	place to store new page number
143  * Returns:
144  *	RET_ERROR, RET_SUCCESS
145  */
146 void *
147 mpool_new(mp, pgnoaddr)
148 	MPOOL *mp;
149 	pgno_t *pgnoaddr;
150 {
151 	BKT *b;
152 	BKTHDR *hp;
153 
154 #ifdef STATISTICS
155 	++mp->pagenew;
156 #endif
157 	/*
158 	 * Get a BKT from the cache.  Assign a new page number, attach it to
159 	 * the hash and lru chains and return.
160 	 */
161 	if ((b = mpool_bkt(mp)) == NULL)
162 		return (NULL);
163 	*pgnoaddr = b->pgno = mp->npages++;
164 	b->flags = MPOOL_PINNED;
165 	inshash(b, b->pgno);
166 	inschain(b, &mp->lru);
167 	return (b->page);
168 }
169 
170 /*
171  * MPOOL_GET -- get a page from the pool
172  *
173  * Parameters:
174  *	mp:	mpool cookie
175  *	pgno:	page number
176  *	flags:	not used
177  *
178  * Returns:
179  *	RET_ERROR, RET_SUCCESS
180  */
181 void *
182 mpool_get(mp, pgno, flags)
183 	MPOOL *mp;
184 	pgno_t pgno;
185 	u_int flags;		/* XXX not used? */
186 {
187 	BKT *b;
188 	BKTHDR *hp;
189 	off_t off;
190 	int nr;
191 
192 	/*
193 	 * If asking for a specific page that is already in the cache, find
194 	 * it and return it.
195 	 */
196 	if (b = mpool_look(mp, pgno)) {
197 #ifdef STATISTICS
198 		++mp->pageget;
199 #endif
200 #ifdef DEBUG
201 		if (b->flags & MPOOL_PINNED)
202 			__mpoolerr("mpool_get: page %d already pinned",
203 			    b->pgno);
204 #endif
205 		rmchain(b);
206 		inschain(b, &mp->lru);
207 		b->flags |= MPOOL_PINNED;
208 		return (b->page);
209 	}
210 
211 	/* Not allowed to retrieve a non-existent page. */
212 	if (pgno >= mp->npages) {
213 		errno = EINVAL;
214 		return (NULL);
215 	}
216 
217 	/* Get a page from the cache. */
218 	if ((b = mpool_bkt(mp)) == NULL)
219 		return (NULL);
220 	b->pgno = pgno;
221 	b->flags = MPOOL_PINNED;
222 
223 #ifdef STATISTICS
224 	++mp->pageread;
225 #endif
226 	/* Read in the contents. */
227 	off = mp->pagesize * pgno;
228 	if (lseek(mp->fd, off, SEEK_SET) != off)
229 		return (NULL);
230 	if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) {
231 		if (nr >= 0)
232 			errno = EFTYPE;
233 		return (NULL);
234 	}
235 	if (mp->pgin)
236 		(mp->pgin)(mp->pgcookie, b->pgno, b->page);
237 
238 	inshash(b, b->pgno);
239 	inschain(b, &mp->lru);
240 #ifdef STATISTICS
241 	++mp->pageget;
242 #endif
243 	return (b->page);
244 }
245 
246 /*
247  * MPOOL_PUT -- return a page to the pool
248  *
249  * Parameters:
250  *	mp:	mpool cookie
251  *	page:	page pointer
252  *	pgno:	page number
253  *
254  * Returns:
255  *	RET_ERROR, RET_SUCCESS
256  */
257 int
258 mpool_put(mp, page, flags)
259 	MPOOL *mp;
260 	void *page;
261 	u_int flags;
262 {
263 	BKT *baddr;
264 #ifdef DEBUG
265 	BKT *b;
266 #endif
267 
268 #ifdef STATISTICS
269 	++mp->pageput;
270 #endif
271 	baddr = (BKT *)((char *)page - sizeof(BKT));
272 #ifdef DEBUG
273 	if (!(baddr->flags & MPOOL_PINNED))
274 		__mpoolerr("mpool_put: page %d not pinned", b->pgno);
275 	for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
276 		if (b == (BKT *)&mp->lru)
277 			__mpoolerr("mpool_put: %0x: bad address", baddr);
278 		if (b == baddr)
279 			break;
280 	}
281 #endif
282 	baddr->flags &= ~MPOOL_PINNED;
283 	baddr->flags |= flags & MPOOL_DIRTY;
284 	return (RET_SUCCESS);
285 }
286 
287 /*
288  * MPOOL_CLOSE -- close the buffer pool
289  *
290  * Parameters:
291  *	mp:	mpool cookie
292  *
293  * Returns:
294  *	RET_ERROR, RET_SUCCESS
295  */
296 int
297 mpool_close(mp)
298 	MPOOL *mp;
299 {
300 	BKT *b, *next;
301 
302 	/* Free up any space allocated to the lru pages. */
303 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) {
304 		next = b->cprev;
305 		free(b);
306 	}
307 	free(mp);
308 	return (RET_SUCCESS);
309 }
310 
311 /*
312  * MPOOL_SYNC -- sync the file to disk.
313  *
314  * Parameters:
315  *	mp:	mpool cookie
316  *
317  * Returns:
318  *	RET_ERROR, RET_SUCCESS
319  */
320 int
321 mpool_sync(mp)
322 	MPOOL *mp;
323 {
324 	BKT *b;
325 
326 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
327 		if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR)
328 			return (RET_ERROR);
329 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
330 }
331 
332 /*
333  * MPOOL_BKT -- get/create a BKT from the cache
334  *
335  * Parameters:
336  *	mp:	mpool cookie
337  *
338  * Returns:
339  *	NULL on failure and a pointer to the BKT on success
340  */
341 static BKT *
342 mpool_bkt(mp)
343 	MPOOL *mp;
344 {
345 	BKT *b;
346 
347 	if (mp->curcache < mp->maxcache)
348 		goto new;
349 
350 	/*
351 	 * If the cache is maxxed out, search the lru list for a buffer we
352 	 * can flush.  If we find one, write it if necessary and take it off
353 	 * any lists.  If we don't find anything we grow the cache anyway.
354 	 * The cache never shrinks.
355 	 */
356 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
357 		if (!(b->flags & MPOOL_PINNED)) {
358 			if (b->flags & MPOOL_DIRTY &&
359 			    mpool_write(mp, b) == RET_ERROR)
360 				return (NULL);
361 			rmhash(b);
362 			rmchain(b);
363 #ifdef STATISTICS
364 			++mp->pageflush;
365 #endif
366 #ifdef DEBUG
367 			{
368 				void *spage;
369 				spage = b->page;
370 				memset(b, 0xff, sizeof(BKT) + mp->pagesize);
371 				b->page = spage;
372 			}
373 #endif
374 			return (b);
375 		}
376 
377 new:	if ((b = malloc(sizeof(BKT) + mp->pagesize)) == NULL)
378 		return (NULL);
379 #ifdef STATISTICS
380 	++mp->pagealloc;
381 #endif
382 #ifdef DEBUG
383 	memset(b, 0xff, sizeof(BKT) + mp->pagesize);
384 #endif
385 	b->page = (char *)b + sizeof(BKT);
386 	++mp->curcache;
387 	return (b);
388 }
389 
390 /*
391  * MPOOL_WRITE -- sync a page to disk
392  *
393  * Parameters:
394  *	mp:	mpool cookie
395  *
396  * Returns:
397  *	RET_ERROR, RET_SUCCESS
398  */
399 static int
400 mpool_write(mp, b)
401 	MPOOL *mp;
402 	BKT *b;
403 {
404 	off_t off;
405 
406 	if (mp->pgout)
407 		(mp->pgout)(mp->pgcookie, b->pgno, b->page);
408 
409 #ifdef STATISTICS
410 	++mp->pagewrite;
411 #endif
412 	off = mp->pagesize * b->pgno;
413 	if (lseek(mp->fd, off, SEEK_SET) != off)
414 		return (RET_ERROR);
415 	if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize)
416 		return (RET_ERROR);
417 	b->flags &= ~MPOOL_DIRTY;
418 	return (RET_SUCCESS);
419 }
420 
421 /*
422  * MPOOL_LOOK -- lookup a page
423  *
424  * Parameters:
425  *	mp:	mpool cookie
426  *	pgno:	page number
427  *
428  * Returns:
429  *	NULL on failure and a pointer to the BKT on success
430  */
431 static BKT *
432 mpool_look(mp, pgno)
433 	MPOOL *mp;
434 	pgno_t pgno;
435 {
436 	register BKT *b;
437 	register BKTHDR *tb;
438 
439 	/* XXX
440 	 * If find the buffer, put it first on the hash chain so can
441 	 * find it again quickly.
442 	 */
443 	tb = &mp->hashtable[HASHKEY(pgno)];
444 	for (b = tb->hnext; b != (BKT *)tb; b = b->hnext)
445 		if (b->pgno == pgno) {
446 #ifdef STATISTICS
447 			++mp->cachehit;
448 #endif
449 			return (b);
450 		}
451 #ifdef STATISTICS
452 	++mp->cachemiss;
453 #endif
454 	return (NULL);
455 }
456 
457 #ifdef STATISTICS
458 /*
459  * MPOOL_STAT -- cache statistics
460  *
461  * Parameters:
462  *	mp:	mpool cookie
463  */
464 void
465 mpool_stat(mp)
466 	MPOOL *mp;
467 {
468 	BKT *b;
469 	int cnt;
470 	char *sep;
471 
472 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
473 	(void)fprintf(stderr,
474 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
475 	    mp->pagesize, mp->curcache, mp->maxcache);
476 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
477 	    mp->pageput, mp->pageget, mp->pagenew);
478 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
479 	    mp->pagealloc, mp->pageflush);
480 	if (mp->cachehit + mp->cachemiss)
481 		(void)fprintf(stderr,
482 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
483 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
484 		    * 100, mp->cachehit, mp->cachemiss);
485 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
486 	    mp->pageread, mp->pagewrite);
487 
488 	sep = "";
489 	cnt = 0;
490 	for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
491 		(void)fprintf(stderr, "%s%d", sep, b->pgno);
492 		if (b->flags & MPOOL_DIRTY)
493 			(void)fprintf(stderr, "d");
494 		if (b->flags & MPOOL_PINNED)
495 			(void)fprintf(stderr, "P");
496 		if (++cnt == 10) {
497 			sep = "\n";
498 			cnt = 0;
499 		} else
500 			sep = ", ";
501 
502 	}
503 	(void)fprintf(stderr, "\n");
504 }
505 #endif
506 
507 #ifdef DEBUG
508 #if __STDC__
509 #include <stdarg.h>
510 #else
511 #include <varargs.h>
512 #endif
513 
514 static void
515 #if __STDC__
516 __mpoolerr(const char *fmt, ...)
517 #else
518 __mpoolerr(fmt, va_alist)
519 	char *fmt;
520 	va_dcl
521 #endif
522 {
523 	va_list ap;
524 #if __STDC__
525 	va_start(ap, fmt);
526 #else
527 	va_start(ap);
528 #endif
529 	(void)vfprintf(stderr, fmt, ap);
530 	va_end(ap);
531 	(void)fprintf(stderr, "\n");
532 	abort();
533 	/* NOTREACHED */
534 }
535 #endif
536