1 /* $OpenBSD: mpool.c,v 1.20 2015/01/16 16:48:51 deraadt Exp $ */ 2 3 /*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/queue.h> 33 #include <sys/stat.h> 34 35 #include <errno.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <unistd.h> 40 41 #include <db.h> 42 43 #define __MPOOLINTERFACE_PRIVATE 44 #include <mpool.h> 45 46 static BKT *mpool_bkt(MPOOL *); 47 static BKT *mpool_look(MPOOL *, pgno_t); 48 static int mpool_write(MPOOL *, BKT *); 49 50 /* 51 * mpool_open -- 52 * Initialize a memory pool. 53 */ 54 /* ARGSUSED */ 55 MPOOL * 56 mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache) 57 { 58 struct stat sb; 59 MPOOL *mp; 60 int entry; 61 62 /* 63 * Get information about the file. 64 * 65 * XXX 66 * We don't currently handle pipes, although we should. 67 */ 68 if (fstat(fd, &sb)) 69 return (NULL); 70 if (!S_ISREG(sb.st_mode)) { 71 errno = ESPIPE; 72 return (NULL); 73 } 74 75 /* Allocate and initialize the MPOOL cookie. */ 76 if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL) 77 return (NULL); 78 TAILQ_INIT(&mp->lqh); 79 for (entry = 0; entry < HASHSIZE; ++entry) 80 TAILQ_INIT(&mp->hqh[entry]); 81 mp->maxcache = maxcache; 82 mp->npages = sb.st_size / pagesize; 83 mp->pagesize = pagesize; 84 mp->fd = fd; 85 return (mp); 86 } 87 88 /* 89 * mpool_filter -- 90 * Initialize input/output filters. 91 */ 92 void 93 mpool_filter(MPOOL *mp, void (*pgin) (void *, pgno_t, void *), 94 void (*pgout) (void *, pgno_t, void *), void *pgcookie) 95 { 96 mp->pgin = pgin; 97 mp->pgout = pgout; 98 mp->pgcookie = pgcookie; 99 } 100 101 /* 102 * mpool_new -- 103 * Get a new page of memory. 104 */ 105 void * 106 mpool_new(MPOOL *mp, pgno_t *pgnoaddr, u_int flags) 107 { 108 struct _hqh *head; 109 BKT *bp; 110 111 if (mp->npages == MAX_PAGE_NUMBER) { 112 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n"); 113 abort(); 114 } 115 #ifdef STATISTICS 116 ++mp->pagenew; 117 #endif 118 /* 119 * Get a BKT from the cache. Assign a new page number, attach 120 * it to the head of the hash chain, the tail of the lru chain, 121 * and return. 122 */ 123 if ((bp = mpool_bkt(mp)) == NULL) 124 return (NULL); 125 if (flags == MPOOL_PAGE_REQUEST) { 126 mp->npages++; 127 bp->pgno = *pgnoaddr; 128 } else 129 bp->pgno = *pgnoaddr = mp->npages++; 130 131 bp->flags = MPOOL_PINNED | MPOOL_INUSE; 132 133 head = &mp->hqh[HASHKEY(bp->pgno)]; 134 TAILQ_INSERT_HEAD(head, bp, hq); 135 TAILQ_INSERT_TAIL(&mp->lqh, bp, q); 136 return (bp->page); 137 } 138 139 int 140 mpool_delete(MPOOL *mp, void *page) 141 { 142 struct _hqh *head; 143 BKT *bp; 144 145 bp = (BKT *)((char *)page - sizeof(BKT)); 146 147 #ifdef DEBUG 148 if (!(bp->flags & MPOOL_PINNED)) { 149 (void)fprintf(stderr, 150 "mpool_delete: page %d not pinned\n", bp->pgno); 151 abort(); 152 } 153 #endif 154 155 /* Remove from the hash and lru queues. */ 156 head = &mp->hqh[HASHKEY(bp->pgno)]; 157 TAILQ_REMOVE(head, bp, hq); 158 TAILQ_REMOVE(&mp->lqh, bp, q); 159 160 free(bp); 161 mp->curcache--; 162 return (RET_SUCCESS); 163 } 164 165 /* 166 * mpool_get 167 * Get a page. 168 */ 169 /* ARGSUSED */ 170 void * 171 mpool_get(MPOOL *mp, pgno_t pgno, 172 u_int flags) /* XXX not used? */ 173 { 174 struct _hqh *head; 175 BKT *bp; 176 off_t off; 177 int nr; 178 179 #ifdef STATISTICS 180 ++mp->pageget; 181 #endif 182 183 /* Check for a page that is cached. */ 184 if ((bp = mpool_look(mp, pgno)) != NULL) { 185 #ifdef DEBUG 186 if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) { 187 (void)fprintf(stderr, 188 "mpool_get: page %d already pinned\n", bp->pgno); 189 abort(); 190 } 191 #endif 192 /* 193 * Move the page to the head of the hash chain and the tail 194 * of the lru chain. 195 */ 196 head = &mp->hqh[HASHKEY(bp->pgno)]; 197 TAILQ_REMOVE(head, bp, hq); 198 TAILQ_INSERT_HEAD(head, bp, hq); 199 TAILQ_REMOVE(&mp->lqh, bp, q); 200 TAILQ_INSERT_TAIL(&mp->lqh, bp, q); 201 202 /* Return a pinned page. */ 203 bp->flags |= MPOOL_PINNED; 204 return (bp->page); 205 } 206 207 /* Get a page from the cache. */ 208 if ((bp = mpool_bkt(mp)) == NULL) 209 return (NULL); 210 211 /* Read in the contents. */ 212 off = mp->pagesize * pgno; 213 if ((nr = pread(mp->fd, bp->page, mp->pagesize, off)) != mp->pagesize) { 214 switch (nr) { 215 case -1: 216 /* errno is set for us by pread(). */ 217 free(bp); 218 mp->curcache--; 219 return (NULL); 220 case 0: 221 /* 222 * A zero-length read means you need to create a 223 * new page. 224 */ 225 memset(bp->page, 0, mp->pagesize); 226 break; 227 default: 228 /* A partial read is definitely bad. */ 229 free(bp); 230 mp->curcache--; 231 errno = EINVAL; 232 return (NULL); 233 } 234 } 235 #ifdef STATISTICS 236 ++mp->pageread; 237 #endif 238 239 /* Set the page number, pin the page. */ 240 bp->pgno = pgno; 241 if (!(flags & MPOOL_IGNOREPIN)) 242 bp->flags = MPOOL_PINNED; 243 bp->flags |= MPOOL_INUSE; 244 245 /* 246 * Add the page to the head of the hash chain and the tail 247 * of the lru chain. 248 */ 249 head = &mp->hqh[HASHKEY(bp->pgno)]; 250 TAILQ_INSERT_HEAD(head, bp, hq); 251 TAILQ_INSERT_TAIL(&mp->lqh, bp, q); 252 253 /* Run through the user's filter. */ 254 if (mp->pgin != NULL) 255 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page); 256 257 return (bp->page); 258 } 259 260 /* 261 * mpool_put 262 * Return a page. 263 */ 264 /* ARGSUSED */ 265 int 266 mpool_put(MPOOL *mp, void *page, u_int flags) 267 { 268 BKT *bp; 269 270 #ifdef STATISTICS 271 ++mp->pageput; 272 #endif 273 bp = (BKT *)((char *)page - sizeof(BKT)); 274 #ifdef DEBUG 275 if (!(bp->flags & MPOOL_PINNED)) { 276 (void)fprintf(stderr, 277 "mpool_put: page %d not pinned\n", bp->pgno); 278 abort(); 279 } 280 #endif 281 bp->flags &= ~MPOOL_PINNED; 282 if (flags & MPOOL_DIRTY) 283 bp->flags |= flags & MPOOL_DIRTY; 284 return (RET_SUCCESS); 285 } 286 287 /* 288 * mpool_close 289 * Close the buffer pool. 290 */ 291 int 292 mpool_close(MPOOL *mp) 293 { 294 BKT *bp; 295 296 /* Free up any space allocated to the lru pages. */ 297 while ((bp = TAILQ_FIRST(&mp->lqh))) { 298 TAILQ_REMOVE(&mp->lqh, bp, q); 299 free(bp); 300 } 301 302 /* Free the MPOOL cookie. */ 303 free(mp); 304 return (RET_SUCCESS); 305 } 306 307 /* 308 * mpool_sync 309 * Sync the pool to disk. 310 */ 311 int 312 mpool_sync(MPOOL *mp) 313 { 314 BKT *bp; 315 316 /* Walk the lru chain, flushing any dirty pages to disk. */ 317 TAILQ_FOREACH(bp, &mp->lqh, q) 318 if (bp->flags & MPOOL_DIRTY && 319 mpool_write(mp, bp) == RET_ERROR) 320 return (RET_ERROR); 321 322 /* Sync the file descriptor. */ 323 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 324 } 325 326 /* 327 * mpool_bkt 328 * Get a page from the cache (or create one). 329 */ 330 static BKT * 331 mpool_bkt(MPOOL *mp) 332 { 333 struct _hqh *head; 334 BKT *bp; 335 336 /* If under the max cached, always create a new page. */ 337 if (mp->curcache < mp->maxcache) 338 goto new; 339 340 /* 341 * If the cache is max'd out, walk the lru list for a buffer we 342 * can flush. If we find one, write it (if necessary) and take it 343 * off any lists. If we don't find anything we grow the cache anyway. 344 * The cache never shrinks. 345 */ 346 TAILQ_FOREACH(bp, &mp->lqh, q) 347 if (!(bp->flags & MPOOL_PINNED)) { 348 /* Flush if dirty. */ 349 if (bp->flags & MPOOL_DIRTY && 350 mpool_write(mp, bp) == RET_ERROR) 351 return (NULL); 352 #ifdef STATISTICS 353 ++mp->pageflush; 354 #endif 355 /* Remove from the hash and lru queues. */ 356 head = &mp->hqh[HASHKEY(bp->pgno)]; 357 TAILQ_REMOVE(head, bp, hq); 358 TAILQ_REMOVE(&mp->lqh, bp, q); 359 #ifdef DEBUG 360 { void *spage; 361 spage = bp->page; 362 memset(bp, 0xff, sizeof(BKT) + mp->pagesize); 363 bp->page = spage; 364 } 365 #endif 366 bp->flags = 0; 367 return (bp); 368 } 369 370 new: if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL) 371 return (NULL); 372 #ifdef STATISTICS 373 ++mp->pagealloc; 374 #endif 375 memset(bp, 0xff, sizeof(BKT) + mp->pagesize); 376 bp->page = (char *)bp + sizeof(BKT); 377 bp->flags = 0; 378 ++mp->curcache; 379 return (bp); 380 } 381 382 /* 383 * mpool_write 384 * Write a page to disk. 385 */ 386 static int 387 mpool_write(MPOOL *mp, BKT *bp) 388 { 389 off_t off; 390 391 #ifdef STATISTICS 392 ++mp->pagewrite; 393 #endif 394 395 /* Run through the user's filter. */ 396 if (mp->pgout) 397 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page); 398 399 off = mp->pagesize * bp->pgno; 400 if (pwrite(mp->fd, bp->page, mp->pagesize, off) != mp->pagesize) 401 return (RET_ERROR); 402 403 /* 404 * Re-run through the input filter since this page may soon be 405 * accessed via the cache, and whatever the user's output filter 406 * did may screw things up if we don't let the input filter 407 * restore the in-core copy. 408 */ 409 if (mp->pgin) 410 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page); 411 412 bp->flags &= ~MPOOL_DIRTY; 413 return (RET_SUCCESS); 414 } 415 416 /* 417 * mpool_look 418 * Lookup a page in the cache. 419 */ 420 static BKT * 421 mpool_look(MPOOL *mp, pgno_t pgno) 422 { 423 struct _hqh *head; 424 BKT *bp; 425 426 head = &mp->hqh[HASHKEY(pgno)]; 427 TAILQ_FOREACH(bp, head, hq) 428 if ((bp->pgno == pgno) && 429 ((bp->flags & MPOOL_INUSE) == MPOOL_INUSE)) { 430 #ifdef STATISTICS 431 ++mp->cachehit; 432 #endif 433 return (bp); 434 } 435 #ifdef STATISTICS 436 ++mp->cachemiss; 437 #endif 438 return (NULL); 439 } 440 441 #ifdef STATISTICS 442 /* 443 * mpool_stat 444 * Print out cache statistics. 445 */ 446 void 447 mpool_stat(MPOOL *mp) 448 { 449 BKT *bp; 450 int cnt; 451 char *sep; 452 453 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 454 (void)fprintf(stderr, 455 "page size %lu, cacheing %lu pages of %lu page max cache\n", 456 mp->pagesize, mp->curcache, mp->maxcache); 457 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 458 mp->pageput, mp->pageget, mp->pagenew); 459 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 460 mp->pagealloc, mp->pageflush); 461 if (mp->cachehit + mp->cachemiss) 462 (void)fprintf(stderr, 463 "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 464 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 465 * 100, mp->cachehit, mp->cachemiss); 466 (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 467 mp->pageread, mp->pagewrite); 468 469 sep = ""; 470 cnt = 0; 471 TAILQ_FOREACH(bp, &mp->lqh, q) { 472 (void)fprintf(stderr, "%s%d", sep, bp->pgno); 473 if (bp->flags & MPOOL_DIRTY) 474 (void)fprintf(stderr, "d"); 475 if (bp->flags & MPOOL_PINNED) 476 (void)fprintf(stderr, "P"); 477 if (++cnt == 10) { 478 sep = "\n"; 479 cnt = 0; 480 } else 481 sep = ", "; 482 483 } 484 (void)fprintf(stderr, "\n"); 485 } 486 #endif 487