1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 */ 7 8 #if defined(LIBC_SCCS) && !defined(lint) 9 static char sccsid[] = "@(#)mpool.c 5.4 (Berkeley) 02/11/93"; 10 #endif /* LIBC_SCCS and not lint */ 11 12 #include <sys/param.h> 13 #include <sys/stat.h> 14 15 #include <errno.h> 16 #include <stdio.h> 17 #include <stdlib.h> 18 #include <string.h> 19 #include <unistd.h> 20 21 #include <db.h> 22 #define __MPOOLINTERFACE_PRIVATE 23 #include "mpool.h" 24 25 static BKT *mpool_bkt __P((MPOOL *)); 26 static BKT *mpool_look __P((MPOOL *, pgno_t)); 27 static int mpool_write __P((MPOOL *, BKT *)); 28 #ifdef DEBUG 29 static void err __P((const char *fmt, ...)); 30 #endif 31 32 /* 33 * MPOOL_OPEN -- initialize a memory pool. 34 * 35 * Parameters: 36 * key: Shared buffer key. 37 * fd: File descriptor. 38 * pagesize: File page size. 39 * maxcache: Max number of cached pages. 40 * 41 * Returns: 42 * MPOOL pointer, NULL on error. 43 */ 44 MPOOL * 45 mpool_open(key, fd, pagesize, maxcache) 46 DBT *key; 47 int fd; 48 pgno_t pagesize, maxcache; 49 { 50 struct stat sb; 51 MPOOL *mp; 52 int entry; 53 54 if (fstat(fd, &sb)) 55 return (NULL); 56 /* XXX 57 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so 58 * that stat(2) returns true for ISSOCK on pipes. Until then, this is 59 * fairly close. 60 */ 61 if (!S_ISREG(sb.st_mode)) { 62 errno = ESPIPE; 63 return (NULL); 64 } 65 66 if ((mp = malloc(sizeof(MPOOL))) == NULL) 67 return (NULL); 68 mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; 69 mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; 70 for (entry = 0; entry < HASHSIZE; ++entry) 71 mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = 72 mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = 73 (BKT *)&mp->hashtable[entry]; 74 mp->curcache = 0; 75 mp->maxcache = maxcache; 76 mp->pagesize = pagesize; 77 mp->npages = sb.st_size / pagesize; 78 mp->fd = fd; 79 80 #ifdef STATISTICS 81 mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = 82 mp->pageget = mp->pagenew = mp->pageput = mp->pageread = 83 mp->pagewrite = 0; 84 #endif 85 return (mp); 86 } 87 88 /* 89 * MPOOL_FILTER -- initialize input/output filters. 90 * 91 * Parameters: 92 * pgin: Page in conversion routine. 93 * pgout: Page out conversion routine. 94 * pgcookie: Cookie for page in/out routines. 95 */ 96 void 97 mpool_filter(mp, pgin, pgout, pgcookie) 98 MPOOL *mp; 99 void (*pgin) __P((void *, pgno_t, void *)); 100 void (*pgout) __P((void *, pgno_t, void *)); 101 void *pgcookie; 102 { 103 mp->pgin = pgin; 104 mp->pgout = pgout; 105 mp->pgcookie = pgcookie; 106 } 107 108 /* 109 * MPOOL_NEW -- get a new page 110 * 111 * Parameters: 112 * mp: mpool cookie 113 * pgnoadddr: place to store new page number 114 * Returns: 115 * RET_ERROR, RET_SUCCESS 116 */ 117 void * 118 mpool_new(mp, pgnoaddr) 119 MPOOL *mp; 120 pgno_t *pgnoaddr; 121 { 122 BKT *b; 123 BKTHDR *hp; 124 125 #ifdef STATISTICS 126 ++mp->pagenew; 127 #endif 128 /* 129 * Get a BKT from the cache. Assign a new page number, attach it to 130 * the hash and lru chains and return. 131 */ 132 if ((b = mpool_bkt(mp)) == NULL) 133 return (NULL); 134 *pgnoaddr = b->pgno = mp->npages++; 135 b->flags = MPOOL_PINNED; 136 inshash(b, b->pgno); 137 inschain(b, &mp->lru); 138 return (b->page); 139 } 140 141 /* 142 * MPOOL_GET -- get a page from the pool 143 * 144 * Parameters: 145 * mp: mpool cookie 146 * pgno: page number 147 * flags: not used 148 * 149 * Returns: 150 * RET_ERROR, RET_SUCCESS 151 */ 152 void * 153 mpool_get(mp, pgno, flags) 154 MPOOL *mp; 155 pgno_t pgno; 156 u_int flags; /* XXX not used? */ 157 { 158 BKT *b; 159 BKTHDR *hp; 160 off_t off; 161 int nr; 162 163 /* 164 * If asking for a specific page that is already in the cache, find 165 * it and return it. 166 */ 167 if (b = mpool_look(mp, pgno)) { 168 #ifdef STATISTICS 169 ++mp->pageget; 170 #endif 171 #ifdef DEBUG 172 if (b->flags & MPOOL_PINNED) 173 err("mpool_get: page %d already pinned", b->pgno); 174 #endif 175 rmchain(b); 176 inschain(b, &mp->lru); 177 b->flags |= MPOOL_PINNED; 178 return (b->page); 179 } 180 181 /* Not allowed to retrieve a non-existent page. */ 182 if (pgno >= mp->npages) { 183 errno = EINVAL; 184 return (NULL); 185 } 186 187 /* Get a page from the cache. */ 188 if ((b = mpool_bkt(mp)) == NULL) 189 return (NULL); 190 b->pgno = pgno; 191 b->flags = MPOOL_PINNED; 192 193 #ifdef STATISTICS 194 ++mp->pageread; 195 #endif 196 /* Read in the contents. */ 197 off = mp->pagesize * pgno; 198 if (lseek(mp->fd, off, SEEK_SET) != off) 199 return (NULL); 200 if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { 201 if (nr >= 0) 202 errno = EFTYPE; 203 return (NULL); 204 } 205 if (mp->pgin) 206 (mp->pgin)(mp->pgcookie, b->pgno, b->page); 207 208 inshash(b, b->pgno); 209 inschain(b, &mp->lru); 210 #ifdef STATISTICS 211 ++mp->pageget; 212 #endif 213 return (b->page); 214 } 215 216 /* 217 * MPOOL_PUT -- return a page to the pool 218 * 219 * Parameters: 220 * mp: mpool cookie 221 * page: page pointer 222 * pgno: page number 223 * 224 * Returns: 225 * RET_ERROR, RET_SUCCESS 226 */ 227 int 228 mpool_put(mp, page, flags) 229 MPOOL *mp; 230 void *page; 231 u_int flags; 232 { 233 BKT *baddr; 234 #ifdef DEBUG 235 BKT *b; 236 #endif 237 238 #ifdef STATISTICS 239 ++mp->pageput; 240 #endif 241 baddr = (BKT *)((char *)page - sizeof(BKT)); 242 #ifdef DEBUG 243 if (!(baddr->flags & MPOOL_PINNED)) 244 err("mpool_put: page %d not pinned", b->pgno); 245 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 246 if (b == (BKT *)&mp->lru) 247 err("mpool_put: %0x: bad address", baddr); 248 if (b == baddr) 249 break; 250 } 251 #endif 252 baddr->flags &= ~MPOOL_PINNED; 253 baddr->flags |= flags & MPOOL_DIRTY; 254 return (RET_SUCCESS); 255 } 256 257 /* 258 * MPOOL_CLOSE -- close the buffer pool 259 * 260 * Parameters: 261 * mp: mpool cookie 262 * 263 * Returns: 264 * RET_ERROR, RET_SUCCESS 265 */ 266 int 267 mpool_close(mp) 268 MPOOL *mp; 269 { 270 BKT *b, *next; 271 272 /* Free up any space allocated to the lru pages. */ 273 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { 274 next = b->cprev; 275 free(b); 276 } 277 free(mp); 278 return (RET_SUCCESS); 279 } 280 281 /* 282 * MPOOL_SYNC -- sync the file to disk. 283 * 284 * Parameters: 285 * mp: mpool cookie 286 * 287 * Returns: 288 * RET_ERROR, RET_SUCCESS 289 */ 290 int 291 mpool_sync(mp) 292 MPOOL *mp; 293 { 294 BKT *b; 295 296 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 297 if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) 298 return (RET_ERROR); 299 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 300 } 301 302 /* 303 * MPOOL_BKT -- get/create a BKT from the cache 304 * 305 * Parameters: 306 * mp: mpool cookie 307 * 308 * Returns: 309 * NULL on failure and a pointer to the BKT on success 310 */ 311 static BKT * 312 mpool_bkt(mp) 313 MPOOL *mp; 314 { 315 BKT *b; 316 317 if (mp->curcache < mp->maxcache) 318 goto new; 319 320 /* 321 * If the cache is maxxed out, search the lru list for a buffer we 322 * can flush. If we find one, write it if necessary and take it off 323 * any lists. If we don't find anything we grow the cache anyway. 324 * The cache never shrinks. 325 */ 326 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 327 if (!(b->flags & MPOOL_PINNED)) { 328 if (b->flags & MPOOL_DIRTY && 329 mpool_write(mp, b) == RET_ERROR) 330 return (NULL); 331 rmhash(b); 332 rmchain(b); 333 #ifdef STATISTICS 334 ++mp->pageflush; 335 #endif 336 #ifdef DEBUG 337 { 338 void *spage; 339 spage = b->page; 340 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 341 b->page = spage; 342 } 343 #endif 344 return (b); 345 } 346 347 new: if ((b = malloc(sizeof(BKT) + mp->pagesize)) == NULL) 348 return (NULL); 349 #ifdef STATISTICS 350 ++mp->pagealloc; 351 #endif 352 #ifdef DEBUG 353 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 354 #endif 355 b->page = (char *)b + sizeof(BKT); 356 ++mp->curcache; 357 return (b); 358 } 359 360 /* 361 * MPOOL_WRITE -- sync a page to disk 362 * 363 * Parameters: 364 * mp: mpool cookie 365 * 366 * Returns: 367 * RET_ERROR, RET_SUCCESS 368 */ 369 static int 370 mpool_write(mp, b) 371 MPOOL *mp; 372 BKT *b; 373 { 374 off_t off; 375 376 if (mp->pgout) 377 (mp->pgout)(mp->pgcookie, b->pgno, b->page); 378 379 #ifdef STATISTICS 380 ++mp->pagewrite; 381 #endif 382 off = mp->pagesize * b->pgno; 383 if (lseek(mp->fd, off, SEEK_SET) != off) 384 return (RET_ERROR); 385 if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) 386 return (RET_ERROR); 387 b->flags &= ~MPOOL_DIRTY; 388 return (RET_SUCCESS); 389 } 390 391 /* 392 * MPOOL_LOOK -- lookup a page 393 * 394 * Parameters: 395 * mp: mpool cookie 396 * pgno: page number 397 * 398 * Returns: 399 * NULL on failure and a pointer to the BKT on success 400 */ 401 static BKT * 402 mpool_look(mp, pgno) 403 MPOOL *mp; 404 pgno_t pgno; 405 { 406 register BKT *b; 407 register BKTHDR *tb; 408 409 /* XXX 410 * If find the buffer, put it first on the hash chain so can 411 * find it again quickly. 412 */ 413 tb = &mp->hashtable[HASHKEY(pgno)]; 414 for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) 415 if (b->pgno == pgno) { 416 #ifdef STATISTICS 417 ++mp->cachehit; 418 #endif 419 return (b); 420 } 421 #ifdef STATISTICS 422 ++mp->cachemiss; 423 #endif 424 return (NULL); 425 } 426 427 #ifdef STATISTICS 428 /* 429 * MPOOL_STAT -- cache statistics 430 * 431 * Parameters: 432 * mp: mpool cookie 433 */ 434 void 435 mpool_stat(mp) 436 MPOOL *mp; 437 { 438 BKT *b; 439 int cnt; 440 char *sep; 441 442 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 443 (void)fprintf(stderr, 444 "page size %lu, cacheing %lu pages of %lu page max cache\n", 445 mp->pagesize, mp->curcache, mp->maxcache); 446 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 447 mp->pageput, mp->pageget, mp->pagenew); 448 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 449 mp->pagealloc, mp->pageflush); 450 if (mp->cachehit + mp->cachemiss) 451 (void)fprintf(stderr, 452 "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 453 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 454 * 100, mp->cachehit, mp->cachemiss); 455 (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 456 mp->pageread, mp->pagewrite); 457 458 sep = ""; 459 cnt = 0; 460 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 461 (void)fprintf(stderr, "%s%d", sep, b->pgno); 462 if (b->flags & MPOOL_DIRTY) 463 (void)fprintf(stderr, "d"); 464 if (b->flags & MPOOL_PINNED) 465 (void)fprintf(stderr, "P"); 466 if (++cnt == 10) { 467 sep = "\n"; 468 cnt = 0; 469 } else 470 sep = ", "; 471 472 } 473 (void)fprintf(stderr, "\n"); 474 } 475 #endif 476 477 #ifdef DEBUG 478 #if __STDC__ 479 #include <stdarg.h> 480 #else 481 #include <varargs.h> 482 #endif 483 484 static void 485 #if __STDC__ 486 err(const char *fmt, ...) 487 #else 488 err(fmt, va_alist) 489 char *fmt; 490 va_dcl 491 #endif 492 { 493 va_list ap; 494 #if __STDC__ 495 va_start(ap, fmt); 496 #else 497 va_start(ap); 498 #endif 499 (void)vfprintf(stderr, fmt, ap); 500 va_end(ap); 501 (void)fprintf(stderr, "\n"); 502 abort(); 503 /* NOTREACHED */ 504 } 505 #endif 506