1 /* $OpenBSD: mpool.c,v 1.12 2003/06/02 20:18:34 millert Exp $ */ 2 3 /*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #if defined(LIBC_SCCS) && !defined(lint) 33 #if 0 34 static char sccsid[] = "@(#)mpool.c 8.7 (Berkeley) 11/2/95"; 35 #else 36 static const char rcsid[] = "$OpenBSD: mpool.c,v 1.12 2003/06/02 20:18:34 millert Exp $"; 37 #endif 38 #endif /* LIBC_SCCS and not lint */ 39 40 #include <sys/param.h> 41 #include <sys/queue.h> 42 #include <sys/stat.h> 43 44 #include <errno.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 50 #include <db.h> 51 52 #define __MPOOLINTERFACE_PRIVATE 53 #include <mpool.h> 54 55 static BKT *mpool_bkt(MPOOL *); 56 static BKT *mpool_look(MPOOL *, pgno_t); 57 static int mpool_write(MPOOL *, BKT *); 58 59 /* 60 * mpool_open -- 61 * Initialize a memory pool. 62 */ 63 /* ARGSUSED */ 64 MPOOL * 65 mpool_open(key, fd, pagesize, maxcache) 66 void *key; 67 int fd; 68 pgno_t pagesize, maxcache; 69 { 70 struct stat sb; 71 MPOOL *mp; 72 int entry; 73 74 /* 75 * Get information about the file. 76 * 77 * XXX 78 * We don't currently handle pipes, although we should. 79 */ 80 if (fstat(fd, &sb)) 81 return (NULL); 82 if (!S_ISREG(sb.st_mode)) { 83 errno = ESPIPE; 84 return (NULL); 85 } 86 87 /* Allocate and initialize the MPOOL cookie. */ 88 if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL) 89 return (NULL); 90 CIRCLEQ_INIT(&mp->lqh); 91 for (entry = 0; entry < HASHSIZE; ++entry) 92 CIRCLEQ_INIT(&mp->hqh[entry]); 93 mp->maxcache = maxcache; 94 mp->npages = sb.st_size / pagesize; 95 mp->pagesize = pagesize; 96 mp->fd = fd; 97 return (mp); 98 } 99 100 /* 101 * mpool_filter -- 102 * Initialize input/output filters. 103 */ 104 void 105 mpool_filter(mp, pgin, pgout, pgcookie) 106 MPOOL *mp; 107 void (*pgin)(void *, pgno_t, void *); 108 void (*pgout)(void *, pgno_t, void *); 109 void *pgcookie; 110 { 111 mp->pgin = pgin; 112 mp->pgout = pgout; 113 mp->pgcookie = pgcookie; 114 } 115 116 /* 117 * mpool_new -- 118 * Get a new page of memory. 119 */ 120 void * 121 mpool_new(mp, pgnoaddr, flags) 122 MPOOL *mp; 123 pgno_t *pgnoaddr; 124 u_int flags; 125 { 126 struct _hqh *head; 127 BKT *bp; 128 129 if (mp->npages == MAX_PAGE_NUMBER) { 130 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n"); 131 abort(); 132 } 133 #ifdef STATISTICS 134 ++mp->pagenew; 135 #endif 136 /* 137 * Get a BKT from the cache. Assign a new page number, attach 138 * it to the head of the hash chain, the tail of the lru chain, 139 * and return. 140 */ 141 if ((bp = mpool_bkt(mp)) == NULL) 142 return (NULL); 143 if (flags == MPOOL_PAGE_REQUEST) { 144 mp->npages++; 145 bp->pgno = *pgnoaddr; 146 } else 147 bp->pgno = *pgnoaddr = mp->npages++; 148 149 bp->flags = MPOOL_PINNED | MPOOL_INUSE; 150 151 head = &mp->hqh[HASHKEY(bp->pgno)]; 152 CIRCLEQ_INSERT_HEAD(head, bp, hq); 153 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); 154 return (bp->page); 155 } 156 157 int 158 mpool_delete(mp, page) 159 MPOOL *mp; 160 void *page; 161 { 162 struct _hqh *head; 163 BKT *bp; 164 165 bp = (BKT *)((char *)page - sizeof(BKT)); 166 167 #ifdef DEBUG 168 if (!(bp->flags & MPOOL_PINNED)) { 169 (void)fprintf(stderr, 170 "mpool_delete: page %d not pinned\n", bp->pgno); 171 abort(); 172 } 173 #endif 174 175 /* Remove from the hash and lru queues. */ 176 head = &mp->hqh[HASHKEY(bp->pgno)]; 177 CIRCLEQ_REMOVE(head, bp, hq); 178 CIRCLEQ_REMOVE(&mp->lqh, bp, q); 179 180 free(bp); 181 return (RET_SUCCESS); 182 } 183 184 /* 185 * mpool_get 186 * Get a page. 187 */ 188 /* ARGSUSED */ 189 void * 190 mpool_get(mp, pgno, flags) 191 MPOOL *mp; 192 pgno_t pgno; 193 u_int flags; /* XXX not used? */ 194 { 195 struct _hqh *head; 196 BKT *bp; 197 off_t off; 198 int nr; 199 200 #ifdef STATISTICS 201 ++mp->pageget; 202 #endif 203 204 /* Check for a page that is cached. */ 205 if ((bp = mpool_look(mp, pgno)) != NULL) { 206 #ifdef DEBUG 207 if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) { 208 (void)fprintf(stderr, 209 "mpool_get: page %d already pinned\n", bp->pgno); 210 abort(); 211 } 212 #endif 213 /* 214 * Move the page to the head of the hash chain and the tail 215 * of the lru chain. 216 */ 217 head = &mp->hqh[HASHKEY(bp->pgno)]; 218 CIRCLEQ_REMOVE(head, bp, hq); 219 CIRCLEQ_INSERT_HEAD(head, bp, hq); 220 CIRCLEQ_REMOVE(&mp->lqh, bp, q); 221 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); 222 223 /* Return a pinned page. */ 224 bp->flags |= MPOOL_PINNED; 225 return (bp->page); 226 } 227 228 /* Get a page from the cache. */ 229 if ((bp = mpool_bkt(mp)) == NULL) 230 return (NULL); 231 232 /* Read in the contents. */ 233 #ifdef STATISTICS 234 ++mp->pageread; 235 #endif 236 off = mp->pagesize * pgno; 237 if ((nr = pread(mp->fd, bp->page, mp->pagesize, off)) != mp->pagesize) { 238 switch (nr) { 239 case -1: 240 /* errno is set for us by pread(). */ 241 return (NULL); 242 case 0: 243 /* 244 * A zero-length read means you need to create a 245 * new page. 246 */ 247 memset(bp->page, 0, mp->pagesize); 248 default: 249 /* A partial read is definitely bad. */ 250 errno = EINVAL; 251 return (NULL); 252 } 253 } 254 255 /* Set the page number, pin the page. */ 256 bp->pgno = pgno; 257 if (!(flags & MPOOL_IGNOREPIN)) 258 bp->flags = MPOOL_PINNED; 259 bp->flags |= MPOOL_INUSE; 260 261 /* 262 * Add the page to the head of the hash chain and the tail 263 * of the lru chain. 264 */ 265 head = &mp->hqh[HASHKEY(bp->pgno)]; 266 CIRCLEQ_INSERT_HEAD(head, bp, hq); 267 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q); 268 269 /* Run through the user's filter. */ 270 if (mp->pgin != NULL) 271 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page); 272 273 return (bp->page); 274 } 275 276 /* 277 * mpool_put 278 * Return a page. 279 */ 280 /* ARGSUSED */ 281 int 282 mpool_put(mp, page, flags) 283 MPOOL *mp; 284 void *page; 285 u_int flags; 286 { 287 BKT *bp; 288 289 #ifdef STATISTICS 290 ++mp->pageput; 291 #endif 292 bp = (BKT *)((char *)page - sizeof(BKT)); 293 #ifdef DEBUG 294 if (!(bp->flags & MPOOL_PINNED)) { 295 (void)fprintf(stderr, 296 "mpool_put: page %d not pinned\n", bp->pgno); 297 abort(); 298 } 299 #endif 300 bp->flags &= ~MPOOL_PINNED; 301 if (flags & MPOOL_DIRTY) 302 bp->flags |= flags & MPOOL_DIRTY; 303 return (RET_SUCCESS); 304 } 305 306 /* 307 * mpool_close 308 * Close the buffer pool. 309 */ 310 int 311 mpool_close(mp) 312 MPOOL *mp; 313 { 314 BKT *bp; 315 316 /* Free up any space allocated to the lru pages. */ 317 while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) { 318 CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q); 319 free(bp); 320 } 321 322 /* Free the MPOOL cookie. */ 323 free(mp); 324 return (RET_SUCCESS); 325 } 326 327 /* 328 * mpool_sync 329 * Sync the pool to disk. 330 */ 331 int 332 mpool_sync(mp) 333 MPOOL *mp; 334 { 335 BKT *bp; 336 337 /* Walk the lru chain, flushing any dirty pages to disk. */ 338 for (bp = mp->lqh.cqh_first; 339 bp != (void *)&mp->lqh; bp = bp->q.cqe_next) 340 if (bp->flags & MPOOL_DIRTY && 341 mpool_write(mp, bp) == RET_ERROR) 342 return (RET_ERROR); 343 344 /* Sync the file descriptor. */ 345 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 346 } 347 348 /* 349 * mpool_bkt 350 * Get a page from the cache (or create one). 351 */ 352 static BKT * 353 mpool_bkt(mp) 354 MPOOL *mp; 355 { 356 struct _hqh *head; 357 BKT *bp; 358 359 /* If under the max cached, always create a new page. */ 360 if (mp->curcache < mp->maxcache) 361 goto new; 362 363 /* 364 * If the cache is max'd out, walk the lru list for a buffer we 365 * can flush. If we find one, write it (if necessary) and take it 366 * off any lists. If we don't find anything we grow the cache anyway. 367 * The cache never shrinks. 368 */ 369 for (bp = mp->lqh.cqh_first; 370 bp != (void *)&mp->lqh; bp = bp->q.cqe_next) 371 if (!(bp->flags & MPOOL_PINNED)) { 372 /* Flush if dirty. */ 373 if (bp->flags & MPOOL_DIRTY && 374 mpool_write(mp, bp) == RET_ERROR) 375 return (NULL); 376 #ifdef STATISTICS 377 ++mp->pageflush; 378 #endif 379 /* Remove from the hash and lru queues. */ 380 head = &mp->hqh[HASHKEY(bp->pgno)]; 381 CIRCLEQ_REMOVE(head, bp, hq); 382 CIRCLEQ_REMOVE(&mp->lqh, bp, q); 383 #ifdef DEBUG 384 { void *spage; 385 spage = bp->page; 386 memset(bp, 0xff, sizeof(BKT) + mp->pagesize); 387 bp->page = spage; 388 } 389 #endif 390 bp->flags = 0; 391 return (bp); 392 } 393 394 new: if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL) 395 return (NULL); 396 #ifdef STATISTICS 397 ++mp->pagealloc; 398 #endif 399 memset(bp, 0xff, sizeof(BKT) + mp->pagesize); 400 bp->page = (char *)bp + sizeof(BKT); 401 bp->flags = 0; 402 ++mp->curcache; 403 return (bp); 404 } 405 406 /* 407 * mpool_write 408 * Write a page to disk. 409 */ 410 static int 411 mpool_write(mp, bp) 412 MPOOL *mp; 413 BKT *bp; 414 { 415 off_t off; 416 417 #ifdef STATISTICS 418 ++mp->pagewrite; 419 #endif 420 421 /* Run through the user's filter. */ 422 if (mp->pgout) 423 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page); 424 425 off = mp->pagesize * bp->pgno; 426 if (pwrite(mp->fd, bp->page, mp->pagesize, off) != mp->pagesize) 427 return (RET_ERROR); 428 429 /* 430 * Re-run through the input filter since this page may soon be 431 * accessed via the cache, and whatever the user's output filter 432 * did may screw things up if we don't let the input filter 433 * restore the in-core copy. 434 */ 435 if (mp->pgin) 436 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page); 437 438 bp->flags &= ~MPOOL_DIRTY; 439 return (RET_SUCCESS); 440 } 441 442 /* 443 * mpool_look 444 * Lookup a page in the cache. 445 */ 446 static BKT * 447 mpool_look(mp, pgno) 448 MPOOL *mp; 449 pgno_t pgno; 450 { 451 struct _hqh *head; 452 BKT *bp; 453 454 head = &mp->hqh[HASHKEY(pgno)]; 455 for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next) 456 if ((bp->pgno == pgno) && 457 ((bp->flags & MPOOL_INUSE) == MPOOL_INUSE)) { 458 #ifdef STATISTICS 459 ++mp->cachehit; 460 #endif 461 return (bp); 462 } 463 #ifdef STATISTICS 464 ++mp->cachemiss; 465 #endif 466 return (NULL); 467 } 468 469 #ifdef STATISTICS 470 /* 471 * mpool_stat 472 * Print out cache statistics. 473 */ 474 void 475 mpool_stat(mp) 476 MPOOL *mp; 477 { 478 BKT *bp; 479 int cnt; 480 char *sep; 481 482 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 483 (void)fprintf(stderr, 484 "page size %lu, cacheing %lu pages of %lu page max cache\n", 485 mp->pagesize, mp->curcache, mp->maxcache); 486 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 487 mp->pageput, mp->pageget, mp->pagenew); 488 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 489 mp->pagealloc, mp->pageflush); 490 if (mp->cachehit + mp->cachemiss) 491 (void)fprintf(stderr, 492 "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 493 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 494 * 100, mp->cachehit, mp->cachemiss); 495 (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 496 mp->pageread, mp->pagewrite); 497 498 sep = ""; 499 cnt = 0; 500 for (bp = mp->lqh.cqh_first; 501 bp != (void *)&mp->lqh; bp = bp->q.cqe_next) { 502 (void)fprintf(stderr, "%s%d", sep, bp->pgno); 503 if (bp->flags & MPOOL_DIRTY) 504 (void)fprintf(stderr, "d"); 505 if (bp->flags & MPOOL_PINNED) 506 (void)fprintf(stderr, "P"); 507 if (++cnt == 10) { 508 sep = "\n"; 509 cnt = 0; 510 } else 511 sep = ", "; 512 513 } 514 (void)fprintf(stderr, "\n"); 515 } 516 #endif 517