1 /* $NetBSD: mpool.c,v 1.6 1996/05/03 21:29:48 cgd Exp $ */ 2 3 /*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #if defined(LIBC_SCCS) && !defined(lint) 37 #if 0 38 static char sccsid[] = "@(#)mpool.c 8.5 (Berkeley) 7/26/94"; 39 #else 40 static char rcsid[] = "$NetBSD: mpool.c,v 1.6 1996/05/03 21:29:48 cgd Exp $"; 41 #endif 42 #endif /* LIBC_SCCS and not lint */ 43 44 #include <sys/param.h> 45 #include <sys/queue.h> 46 #include <sys/stat.h> 47 48 #include <errno.h> 49 #include <stdio.h> 50 #include <stdlib.h> 51 #include <string.h> 52 #include <unistd.h> 53 54 #include <db.h> 55 56 #define __MPOOLINTERFACE_PRIVATE 57 #include <mpool.h> 58 59 static BKT *mpool_bkt __P((MPOOL *)); 60 static BKT *mpool_look __P((MPOOL *, pgno_t)); 61 static int mpool_write __P((MPOOL *, BKT *)); 62 63 /* 64 * mpool_open -- 65 * Initialize a memory pool. 66 */ 67 MPOOL * 68 mpool_open(key, fd, pagesize, maxcache) 69 void *key; 70 int fd; 71 pgno_t pagesize, maxcache; 72 { 73 struct stat sb; 74 MPOOL *mp; 75 int entry; 76 77 /* 78 * Get information about the file. 79 * 80 * XXX 81 * We don't currently handle pipes, although we should. 82 */ 83 if (fstat(fd, &sb)) 84 return (NULL); 85 if (!S_ISREG(sb.st_mode)) { 86 errno = ESPIPE; 87 return (NULL); 88 } 89 90 /* Allocate and initialize the MPOOL cookie. */ 91 if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL) 92 return (NULL); 93 CIRCLEQ_INIT(&mp->lqh); 94 for (entry = 0; entry < HASHSIZE; ++entry) 95 CIRCLEQ_INIT(&mp->hqh[entry]); 96 mp->maxcache = maxcache; 97 mp->npages = sb.st_size / pagesize; 98 mp->pagesize = pagesize; 99 mp->fd = fd; 100 return (mp); 101 } 102 103 /* 104 * mpool_filter -- 105 * Initialize input/output filters. 106 */ 107 void 108 mpool_filter(mp, pgin, pgout, pgcookie) 109 MPOOL *mp; 110 void (*pgin) __P((void *, pgno_t, void *)); 111 void (*pgout) __P((void *, pgno_t, void *)); 112 void *pgcookie; 113 { 114 mp->pgin = pgin; 115 mp->pgout = pgout; 116 mp->pgcookie = pgcookie; 117 } 118 119 /* 120 * mpool_new -- 121 * Get a new page of memory. 122 */ 123 void * 124 mpool_new(mp, pgnoaddr) 125 MPOOL *mp; 126 pgno_t *pgnoaddr; 127 { 128 struct _hqh *head; 129 BKT *bp; 130 131 if (mp->npages == MAX_PAGE_NUMBER) { 132 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n"); 133 abort(); 134 } 135 #ifdef STATISTICS 136 ++mp->pagenew; 137 #endif 138 /* 139 * Get a BKT from the cache. Assign a new page number, attach 140 * it to the head of the hash chain, the tail of the lru chain, 141 * and return. 142 */ 143 if ((bp = mpool_bkt(mp)) == NULL) 144 return (NULL); 145 *pgnoaddr = bp->pgno = mp->npages++; 146 bp->flags = MPOOL_PINNED; 147 148 head = &mp->hqh[HASHKEY(bp->pgno)]; 149 CIRCLEQ_INSERT_HEAD(head, bp, hq); 150 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); 151 return (bp->page); 152 } 153 154 /* 155 * mpool_get 156 * Get a page. 157 */ 158 void * 159 mpool_get(mp, pgno, flags) 160 MPOOL *mp; 161 pgno_t pgno; 162 u_int flags; /* XXX not used? */ 163 { 164 struct _hqh *head; 165 BKT *bp; 166 off_t off; 167 int nr; 168 169 /* Check for attempt to retrieve a non-existent page. */ 170 if (pgno >= mp->npages) { 171 errno = EINVAL; 172 return (NULL); 173 } 174 175 #ifdef STATISTICS 176 ++mp->pageget; 177 #endif 178 179 /* Check for a page that is cached. */ 180 if ((bp = mpool_look(mp, pgno)) != NULL) { 181 #ifdef DEBUG 182 if (bp->flags & MPOOL_PINNED) { 183 (void)fprintf(stderr, 184 "mpool_get: page %d already pinned\n", bp->pgno); 185 abort(); 186 } 187 #endif 188 /* 189 * Move the page to the head of the hash chain and the tail 190 * of the lru chain. 191 */ 192 head = &mp->hqh[HASHKEY(bp->pgno)]; 193 CIRCLEQ_REMOVE(head, bp, hq); 194 CIRCLEQ_INSERT_HEAD(head, bp, hq); 195 CIRCLEQ_REMOVE(&mp->lqh, bp, q); 196 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); 197 198 /* Return a pinned page. */ 199 bp->flags |= MPOOL_PINNED; 200 return (bp->page); 201 } 202 203 /* Get a page from the cache. */ 204 if ((bp = mpool_bkt(mp)) == NULL) 205 return (NULL); 206 207 /* Read in the contents. */ 208 #ifdef STATISTICS 209 ++mp->pageread; 210 #endif 211 off = mp->pagesize * pgno; 212 if (lseek(mp->fd, off, SEEK_SET) != off) 213 return (NULL); 214 if ((nr = read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) { 215 if (nr >= 0) 216 errno = EFTYPE; 217 return (NULL); 218 } 219 220 /* Set the page number, pin the page. */ 221 bp->pgno = pgno; 222 bp->flags = MPOOL_PINNED; 223 224 /* 225 * Add the page to the head of the hash chain and the tail 226 * of the lru chain. 227 */ 228 head = &mp->hqh[HASHKEY(bp->pgno)]; 229 CIRCLEQ_INSERT_HEAD(head, bp, hq); 230 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); 231 232 /* Run through the user's filter. */ 233 if (mp->pgin != NULL) 234 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page); 235 236 return (bp->page); 237 } 238 239 /* 240 * mpool_put 241 * Return a page. 242 */ 243 int 244 mpool_put(mp, page, flags) 245 MPOOL *mp; 246 void *page; 247 u_int flags; 248 { 249 BKT *bp; 250 251 #ifdef STATISTICS 252 ++mp->pageput; 253 #endif 254 bp = (BKT *)((char *)page - sizeof(BKT)); 255 #ifdef DEBUG 256 if (!(bp->flags & MPOOL_PINNED)) { 257 (void)fprintf(stderr, 258 "mpool_put: page %d not pinned\n", bp->pgno); 259 abort(); 260 } 261 #endif 262 bp->flags &= ~MPOOL_PINNED; 263 bp->flags |= flags & MPOOL_DIRTY; 264 return (RET_SUCCESS); 265 } 266 267 /* 268 * mpool_close 269 * Close the buffer pool. 270 */ 271 int 272 mpool_close(mp) 273 MPOOL *mp; 274 { 275 BKT *bp; 276 277 /* Free up any space allocated to the lru pages. */ 278 while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) { 279 CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q); 280 free(bp); 281 } 282 283 /* Free the MPOOL cookie. */ 284 free(mp); 285 return (RET_SUCCESS); 286 } 287 288 /* 289 * mpool_sync 290 * Sync the pool to disk. 291 */ 292 int 293 mpool_sync(mp) 294 MPOOL *mp; 295 { 296 BKT *bp; 297 298 /* Walk the lru chain, flushing any dirty pages to disk. */ 299 for (bp = mp->lqh.cqh_first; 300 bp != (void *)&mp->lqh; bp = bp->q.cqe_next) 301 if (bp->flags & MPOOL_DIRTY && 302 mpool_write(mp, bp) == RET_ERROR) 303 return (RET_ERROR); 304 305 /* Sync the file descriptor. */ 306 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 307 } 308 309 /* 310 * mpool_bkt 311 * Get a page from the cache (or create one). 312 */ 313 static BKT * 314 mpool_bkt(mp) 315 MPOOL *mp; 316 { 317 struct _hqh *head; 318 BKT *bp; 319 320 /* If under the max cached, always create a new page. */ 321 if (mp->curcache < mp->maxcache) 322 goto new; 323 324 /* 325 * If the cache is max'd out, walk the lru list for a buffer we 326 * can flush. If we find one, write it (if necessary) and take it 327 * off any lists. If we don't find anything we grow the cache anyway. 328 * The cache never shrinks. 329 */ 330 for (bp = mp->lqh.cqh_first; 331 bp != (void *)&mp->lqh; bp = bp->q.cqe_next) 332 if (!(bp->flags & MPOOL_PINNED)) { 333 /* Flush if dirty. */ 334 if (bp->flags & MPOOL_DIRTY && 335 mpool_write(mp, bp) == RET_ERROR) 336 return (NULL); 337 #ifdef STATISTICS 338 ++mp->pageflush; 339 #endif 340 /* Remove from the hash and lru queues. */ 341 head = &mp->hqh[HASHKEY(bp->pgno)]; 342 CIRCLEQ_REMOVE(head, bp, hq); 343 CIRCLEQ_REMOVE(&mp->lqh, bp, q); 344 #ifdef DEBUG 345 { void *spage; 346 spage = bp->page; 347 memset(bp, 0xff, sizeof(BKT) + mp->pagesize); 348 bp->page = spage; 349 } 350 #endif 351 return (bp); 352 } 353 354 new: if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL) 355 return (NULL); 356 #ifdef STATISTICS 357 ++mp->pagealloc; 358 #endif 359 #if defined(DEBUG) || defined(PURIFY) 360 memset(bp, 0xff, sizeof(BKT) + mp->pagesize); 361 #endif 362 bp->page = (char *)bp + sizeof(BKT); 363 ++mp->curcache; 364 return (bp); 365 } 366 367 /* 368 * mpool_write 369 * Write a page to disk. 370 */ 371 static int 372 mpool_write(mp, bp) 373 MPOOL *mp; 374 BKT *bp; 375 { 376 off_t off; 377 378 #ifdef STATISTICS 379 ++mp->pagewrite; 380 #endif 381 382 /* Run through the user's filter. */ 383 if (mp->pgout) 384 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page); 385 386 off = mp->pagesize * bp->pgno; 387 if (lseek(mp->fd, off, SEEK_SET) != off) 388 return (RET_ERROR); 389 if (write(mp->fd, bp->page, mp->pagesize) != mp->pagesize) 390 return (RET_ERROR); 391 392 bp->flags &= ~MPOOL_DIRTY; 393 return (RET_SUCCESS); 394 } 395 396 /* 397 * mpool_look 398 * Lookup a page in the cache. 399 */ 400 static BKT * 401 mpool_look(mp, pgno) 402 MPOOL *mp; 403 pgno_t pgno; 404 { 405 struct _hqh *head; 406 BKT *bp; 407 408 head = &mp->hqh[HASHKEY(pgno)]; 409 for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next) 410 if (bp->pgno == pgno) { 411 #ifdef STATISTICS 412 ++mp->cachehit; 413 #endif 414 return (bp); 415 } 416 #ifdef STATISTICS 417 ++mp->cachemiss; 418 #endif 419 return (NULL); 420 } 421 422 #ifdef STATISTICS 423 /* 424 * mpool_stat 425 * Print out cache statistics. 426 */ 427 void 428 mpool_stat(mp) 429 MPOOL *mp; 430 { 431 BKT *bp; 432 int cnt; 433 char *sep; 434 435 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 436 (void)fprintf(stderr, 437 "page size %lu, cacheing %lu pages of %lu page max cache\n", 438 mp->pagesize, mp->curcache, mp->maxcache); 439 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 440 mp->pageput, mp->pageget, mp->pagenew); 441 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 442 mp->pagealloc, mp->pageflush); 443 if (mp->cachehit + mp->cachemiss) 444 (void)fprintf(stderr, 445 "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 446 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 447 * 100, mp->cachehit, mp->cachemiss); 448 (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 449 mp->pageread, mp->pagewrite); 450 451 sep = ""; 452 cnt = 0; 453 for (bp = mp->lqh.cqh_first; 454 bp != (void *)&mp->lqh; bp = bp->q.cqe_next) { 455 (void)fprintf(stderr, "%s%d", sep, bp->pgno); 456 if (bp->flags & MPOOL_DIRTY) 457 (void)fprintf(stderr, "d"); 458 if (bp->flags & MPOOL_PINNED) 459 (void)fprintf(stderr, "P"); 460 if (++cnt == 10) { 461 sep = "\n"; 462 cnt = 0; 463 } else 464 sep = ", "; 465 466 } 467 (void)fprintf(stderr, "\n"); 468 } 469 #endif 470