1 /* $NetBSD: mpool.c,v 1.23 2016/09/24 21:31:25 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #if HAVE_NBTOOL_CONFIG_H 33 #include "nbtool_config.h" 34 #endif 35 36 #include <sys/cdefs.h> 37 __RCSID("$NetBSD: mpool.c,v 1.23 2016/09/24 21:31:25 christos Exp $"); 38 39 #include "namespace.h" 40 #include <sys/queue.h> 41 #include <sys/stat.h> 42 43 #include <errno.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 49 #include <db.h> 50 51 #define __MPOOLINTERFACE_PRIVATE 52 #include <mpool.h> 53 54 #ifdef __weak_alias 55 __weak_alias(mpool_close,_mpool_close) 56 __weak_alias(mpool_filter,_mpool_filter) 57 __weak_alias(mpool_get,_mpool_get) 58 __weak_alias(mpool_new,_mpool_new) 59 __weak_alias(mpool_newf,_mpool_newf) 60 __weak_alias(mpool_open,_mpool_open) 61 __weak_alias(mpool_put,_mpool_put) 62 __weak_alias(mpool_sync,_mpool_sync) 63 #endif 64 65 static BKT *mpool_bkt(MPOOL *); 66 static BKT *mpool_look(MPOOL *, pgno_t); 67 static int mpool_write(MPOOL *, BKT *); 68 69 /* 70 * mpool_open -- 71 * Initialize a memory pool. 72 */ 73 /*ARGSUSED*/ 74 MPOOL * 75 mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache) 76 { 77 struct stat sb; 78 MPOOL *mp; 79 int entry; 80 81 /* 82 * Get information about the file. 83 * 84 * XXX 85 * We don't currently handle pipes, although we should. 86 */ 87 if (fstat(fd, &sb)) 88 return NULL; 89 if (!S_ISREG(sb.st_mode)) { 90 errno = ESPIPE; 91 return NULL; 92 } 93 94 /* Allocate and initialize the MPOOL cookie. */ 95 if ((mp = calloc(1, sizeof(*mp))) == NULL) 96 return (NULL); 97 TAILQ_INIT(&mp->lqh); 98 for (entry = 0; entry < HASHSIZE; ++entry) 99 TAILQ_INIT(&mp->hqh[entry]); 100 mp->maxcache = maxcache; 101 mp->npages = (pgno_t)(sb.st_size / pagesize); 102 mp->pagesize = pagesize; 103 mp->fd = fd; 104 return mp; 105 } 106 107 /* 108 * mpool_filter -- 109 * Initialize input/output filters. 110 */ 111 void 112 mpool_filter(MPOOL *mp, void (*pgin)(void *, pgno_t, void *), 113 void (*pgout)(void *, pgno_t, void *), void *pgcookie) 114 { 115 mp->pgin = pgin; 116 mp->pgout = pgout; 117 mp->pgcookie = pgcookie; 118 } 119 120 /* 121 * mpool_new -- 122 * Get a new page of memory. 123 */ 124 void * 125 mpool_newf(MPOOL *mp, pgno_t *pgnoaddr, unsigned int flags) 126 { 127 struct _hqh *head; 128 BKT *bp; 129 130 if (mp->npages == MAX_PAGE_NUMBER) { 131 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n"); 132 abort(); 133 } 134 #ifdef STATISTICS 135 ++mp->pagenew; 136 #endif 137 /* 138 * Get a BKT from the cache. Assign a new page number, attach 139 * it to the head of the hash chain, the tail of the lru chain, 140 * and return. 141 */ 142 if ((bp = mpool_bkt(mp)) == NULL) 143 return NULL; 144 145 if (flags == MPOOL_PAGE_REQUEST) { 146 mp->npages++; 147 bp->pgno = *pgnoaddr; 148 } else 149 bp->pgno = *pgnoaddr = mp->npages++; 150 151 bp->flags = MPOOL_PINNED | MPOOL_INUSE; 152 153 head = &mp->hqh[HASHKEY(bp->pgno)]; 154 TAILQ_INSERT_HEAD(head, bp, hq); 155 TAILQ_INSERT_TAIL(&mp->lqh, bp, q); 156 return bp->page; 157 } 158 159 void * 160 mpool_new(MPOOL *mp, pgno_t *pgnoaddr) 161 { 162 return mpool_newf(mp, pgnoaddr, 0); 163 } 164 165 int 166 mpool_delete(MPOOL *mp, void *page) 167 { 168 struct _hqh *head; 169 BKT *bp; 170 171 bp = (void *)((char *)page - sizeof(BKT)); 172 173 #ifdef DEBUG 174 if (!(bp->flags & MPOOL_PINNED)) { 175 (void)fprintf(stderr, 176 "%s: page %d not pinned\n", __func__, bp->pgno); 177 abort(); 178 } 179 #endif 180 181 /* Remove from the hash and lru queues. */ 182 head = &mp->hqh[HASHKEY(bp->pgno)]; 183 TAILQ_REMOVE(head, bp, hq); 184 TAILQ_REMOVE(&mp->lqh, bp, q); 185 186 free(bp); 187 return RET_SUCCESS; 188 } 189 190 /* 191 * mpool_get 192 * Get a page. 193 */ 194 /*ARGSUSED*/ 195 void * 196 mpool_get(MPOOL *mp, pgno_t pgno, unsigned int flags) 197 { 198 struct _hqh *head; 199 BKT *bp; 200 off_t off; 201 ssize_t nr; 202 203 /* Check for attempt to retrieve a non-existent page. */ 204 if (pgno >= mp->npages) { 205 errno = EINVAL; 206 return NULL; 207 } 208 209 #ifdef STATISTICS 210 ++mp->pageget; 211 #endif 212 213 /* Check for a page that is cached. */ 214 if ((bp = mpool_look(mp, pgno)) != NULL) { 215 #ifdef DEBUG 216 if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) { 217 (void)fprintf(stderr, 218 "mpool_get: page %d already pinned\n", bp->pgno); 219 abort(); 220 } 221 #endif 222 /* 223 * Move the page to the head of the hash chain and the tail 224 * of the lru chain. 225 */ 226 head = &mp->hqh[HASHKEY(bp->pgno)]; 227 TAILQ_REMOVE(head, bp, hq); 228 TAILQ_INSERT_HEAD(head, bp, hq); 229 TAILQ_REMOVE(&mp->lqh, bp, q); 230 TAILQ_INSERT_TAIL(&mp->lqh, bp, q); 231 232 /* Return a pinned page. */ 233 if (!(flags & MPOOL_IGNOREPIN)) 234 bp->flags |= MPOOL_PINNED; 235 return bp->page; 236 } 237 238 /* Get a page from the cache. */ 239 if ((bp = mpool_bkt(mp)) == NULL) 240 return NULL; 241 242 /* Read in the contents. */ 243 #ifdef STATISTICS 244 ++mp->pageread; 245 #endif 246 off = mp->pagesize * pgno; 247 if (off / mp->pagesize != pgno) { 248 /* Run past the end of the file, or at least the part we 249 can address without large-file support? */ 250 errno = E2BIG; 251 return NULL; 252 } 253 254 if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) { 255 if (nr > 0) { 256 errno = EFTYPE; 257 return NULL; 258 } else if (nr == 0) { 259 /* 260 * A zero-length reads, means you need to create a 261 * new page. 262 */ 263 memset(bp->page, 0, mp->pagesize); 264 } else 265 return NULL; 266 } 267 268 /* Set the page number, pin the page. */ 269 bp->pgno = pgno; 270 if (!(flags & MPOOL_IGNOREPIN)) 271 bp->flags = MPOOL_PINNED; 272 bp->flags |= MPOOL_INUSE; 273 274 /* 275 * Add the page to the head of the hash chain and the tail 276 * of the lru chain. 277 */ 278 head = &mp->hqh[HASHKEY(bp->pgno)]; 279 TAILQ_INSERT_HEAD(head, bp, hq); 280 TAILQ_INSERT_TAIL(&mp->lqh, bp, q); 281 282 /* Run through the user's filter. */ 283 if (mp->pgin != NULL) 284 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page); 285 286 return bp->page; 287 } 288 289 /* 290 * mpool_put 291 * Return a page. 292 */ 293 /*ARGSUSED*/ 294 int 295 mpool_put(MPOOL *mp, void *page, u_int flags) 296 { 297 BKT *bp; 298 299 #ifdef STATISTICS 300 ++mp->pageput; 301 #endif 302 bp = (void *)((intptr_t)page - sizeof(BKT)); 303 #ifdef DEBUG 304 if (!(bp->flags & MPOOL_PINNED)) { 305 (void)fprintf(stderr, 306 "mpool_put: page %d not pinned\n", bp->pgno); 307 abort(); 308 } 309 #endif 310 bp->flags &= ~MPOOL_PINNED; 311 if (flags & MPOOL_DIRTY) 312 bp->flags |= flags & MPOOL_DIRTY; 313 return (RET_SUCCESS); 314 } 315 316 /* 317 * mpool_close 318 * Close the buffer pool. 319 */ 320 int 321 mpool_close(MPOOL *mp) 322 { 323 BKT *bp; 324 325 /* Free up any space allocated to the lru pages. */ 326 while (!TAILQ_EMPTY(&mp->lqh)) { 327 bp = TAILQ_FIRST(&mp->lqh); 328 TAILQ_REMOVE(&mp->lqh, bp, q); 329 free(bp); 330 } 331 332 /* Free the MPOOL cookie. */ 333 free(mp); 334 return RET_SUCCESS; 335 } 336 337 /* 338 * mpool_sync 339 * Sync the pool to disk. 340 */ 341 int 342 mpool_sync(MPOOL *mp) 343 { 344 BKT *bp; 345 346 /* Walk the lru chain, flushing any dirty pages to disk. */ 347 TAILQ_FOREACH(bp, &mp->lqh, q) 348 if (bp->flags & MPOOL_DIRTY && 349 mpool_write(mp, bp) == RET_ERROR) 350 return RET_ERROR; 351 352 /* Sync the file descriptor. */ 353 return fsync(mp->fd) ? RET_ERROR : RET_SUCCESS; 354 } 355 356 /* 357 * mpool_bkt 358 * Get a page from the cache (or create one). 359 */ 360 static BKT * 361 mpool_bkt(MPOOL *mp) 362 { 363 struct _hqh *head; 364 BKT *bp; 365 366 /* If under the max cached, always create a new page. */ 367 if (mp->curcache < mp->maxcache) 368 goto new; 369 370 /* 371 * If the cache is max'd out, walk the lru list for a buffer we 372 * can flush. If we find one, write it (if necessary) and take it 373 * off any lists. If we don't find anything we grow the cache anyway. 374 * The cache never shrinks. 375 */ 376 TAILQ_FOREACH(bp, &mp->lqh, q) 377 if (!(bp->flags & MPOOL_PINNED)) { 378 /* Flush if dirty. */ 379 if (bp->flags & MPOOL_DIRTY && 380 mpool_write(mp, bp) == RET_ERROR) 381 return NULL; 382 #ifdef STATISTICS 383 ++mp->pageflush; 384 #endif 385 /* Remove from the hash and lru queues. */ 386 head = &mp->hqh[HASHKEY(bp->pgno)]; 387 TAILQ_REMOVE(head, bp, hq); 388 TAILQ_REMOVE(&mp->lqh, bp, q); 389 #ifdef DEBUG 390 { 391 void *spage = bp->page; 392 (void)memset(bp, 0xff, 393 (size_t)(sizeof(BKT) + mp->pagesize)); 394 bp->page = spage; 395 } 396 #endif 397 return bp; 398 } 399 400 new: if ((bp = calloc(1, (size_t)(sizeof(BKT) + mp->pagesize))) == NULL) 401 return NULL; 402 #ifdef STATISTICS 403 ++mp->pagealloc; 404 #endif 405 #if defined(DEBUG) || defined(PURIFY) 406 (void)memset(bp, 0xff, (size_t)(sizeof(BKT) + mp->pagesize)); 407 #endif 408 bp->page = (void *)((intptr_t)bp + sizeof(BKT)); 409 ++mp->curcache; 410 return bp; 411 } 412 413 /* 414 * mpool_write 415 * Write a page to disk. 416 */ 417 static int 418 mpool_write(MPOOL *mp, BKT *bp) 419 { 420 off_t off; 421 422 #ifdef STATISTICS 423 ++mp->pagewrite; 424 #endif 425 426 /* Run through the user's filter. */ 427 if (mp->pgout) 428 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page); 429 430 off = mp->pagesize * bp->pgno; 431 if (off / mp->pagesize != bp->pgno) { 432 /* Run past the end of the file, or at least the part we 433 can address without large-file support? */ 434 errno = E2BIG; 435 return RET_ERROR; 436 } 437 438 if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) != 439 (ssize_t)mp->pagesize) 440 return RET_ERROR; 441 442 /* 443 * Re-run through the input filter since this page may soon be 444 * accessed via the cache, and whatever the user's output filter 445 * did may screw things up if we don't let the input filter 446 * restore the in-core copy. 447 */ 448 if (mp->pgin) 449 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page); 450 451 bp->flags &= ~MPOOL_DIRTY; 452 return RET_SUCCESS; 453 } 454 455 /* 456 * mpool_look 457 * Lookup a page in the cache. 458 */ 459 static BKT * 460 mpool_look(MPOOL *mp, pgno_t pgno) 461 { 462 struct _hqh *head; 463 BKT *bp; 464 465 head = &mp->hqh[HASHKEY(pgno)]; 466 TAILQ_FOREACH(bp, head, hq) 467 if (bp->pgno == pgno) { 468 #ifdef STATISTICS 469 ++mp->cachehit; 470 #endif 471 return bp; 472 } 473 #ifdef STATISTICS 474 ++mp->cachemiss; 475 #endif 476 return NULL; 477 } 478 479 #ifdef STATISTICS 480 /* 481 * mpool_stat 482 * Print out cache statistics. 483 */ 484 void 485 mpool_stat(mp) 486 MPOOL *mp; 487 { 488 BKT *bp; 489 int cnt; 490 const char *sep; 491 492 (void)fprintf(stderr, "%lu pages in the file\n", (u_long)mp->npages); 493 (void)fprintf(stderr, 494 "page size %lu, cacheing %lu pages of %lu page max cache\n", 495 (u_long)mp->pagesize, (u_long)mp->curcache, (u_long)mp->maxcache); 496 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 497 mp->pageput, mp->pageget, mp->pagenew); 498 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 499 mp->pagealloc, mp->pageflush); 500 if (mp->cachehit + mp->cachemiss) 501 (void)fprintf(stderr, 502 "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 503 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 504 * 100, mp->cachehit, mp->cachemiss); 505 (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 506 mp->pageread, mp->pagewrite); 507 508 sep = ""; 509 cnt = 0; 510 TAILQ_FOREACH(bp, &mp->lqh, q) { 511 (void)fprintf(stderr, "%s%d", sep, bp->pgno); 512 if (bp->flags & MPOOL_DIRTY) 513 (void)fprintf(stderr, "d"); 514 if (bp->flags & MPOOL_PINNED) 515 (void)fprintf(stderr, "P"); 516 if (++cnt == 10) { 517 sep = "\n"; 518 cnt = 0; 519 } else 520 sep = ", "; 521 522 } 523 (void)fprintf(stderr, "\n"); 524 } 525 #endif 526