xref: /csrg-svn/lib/libc/db/btree/lrucache.c (revision 45875)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Mike Olson.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)lrucache.c	5.1 (Berkeley) 01/07/91";
13 #endif /* LIBC_SCCS and not lint */
14 
15 /*
16  *  lrucache.c -- LRU cache for disk-based btree pages.
17  *
18  *	This file implements an LRU cache in user space for disk-based
19  *	btrees.
20  */
21 #include <sys/param.h>
22 #include <sys/file.h>
23 #include <sys/signal.h>
24 
25 /*
26  *  LRU list entries.  The head of the list is the most-recently requested
27  *  block; the tail is the least-recently requested one.
28  */
29 
30 typedef struct LRU_ENT {
31 	char	*l_buffer;		/* buffer we return to user */
32 	int	l_pgno;			/* logical page number */
33 	int	l_flags;		/* FREE and DIRTY bits */
34 	struct LRU_ENT	*l_prev;	/* predecessor in LRU list */
35 	struct LRU_ENT	*l_next;	/* successor in LRU list */
36 } LRU_ENT;
37 
38 /*
39  *  Cache entries.  We use a hash table to avoid a linear walk of the LRU
40  *  list when we need to look up blocks by number.  The hash table is
41  *  chained.
42  */
43 
44 typedef struct CACHE_ENT {
45 	int			c_pgno;
46 	LRU_ENT			*c_lruent;
47 	struct CACHE_ENT	*c_chain;
48 } CACHE_ENT;
49 
50 /*
51  *  The LRU cache structure.  The cache size (lru_csize) is the largest size
52  *  the user wants us to grow to; current size (lru_cursz) is always less than
53  *  or equal to lru_csize.  Note that we will grow the cache (lru_csize) if
54  *  it's the only way that we can satisfy a user's block request.
55  */
56 
57 typedef struct LRUCACHE {
58 	int		lru_fd;
59 	int		lru_csize;
60 	int		lru_psize;
61 	int		lru_cursz;
62 	char		*lru_opaque;		/* passed to inproc, outproc */
63 	int		(*lru_inproc)();
64 	int		(*lru_outproc)();
65 	LRU_ENT		*lru_head;
66 	LRU_ENT		*lru_tail;
67 	CACHE_ENT	**lru_cache;
68 } LRUCACHE;
69 
70 /* this is the opaque type we return for LRU caches */
71 typedef	char	*LRU;
72 
73 /* bits for l_flags in LRU_ENT structure */
74 #define	F_DIRTY		(1 << 0)
75 #define F_FREE		(1 << 1)
76 
77 #define HASH(l, pgno)	(pgno % l->lru_csize)
78 
79 /* all of these are defined in this file */
80 static CACHE_ENT	*lruhashget();
81 static CACHE_ENT	*lruhashput();
82 static int 		lruhashdel();
83 static void		lruhead();
84 static int 		lrugrow();
85 extern LRU		lruinit();
86 extern int		lruwrite();
87 extern int		lrusync();
88 extern char		*lruget();
89 extern char		*lrugetnew();
90 static char		*lrugetpg();
91 extern int		lrurelease();
92 extern void		lrufree();
93 
94 /*
95  *  LRUINIT -- Initialize a new LRU cache.
96  *
97  *	There's a separate LRU cache for every open file descriptor on which
98  *	the user wants caching.  The desired cache size and logical page
99  *	size are passed in.  We try to avoid growing the cache beyond the
100  *	limit specified by the user, but if we cannot satisfy a block request
101  *	without growing the cache, we do so.
102  *
103  *	Note that the page size passed in is the logical page size for
104  *	use with this file descriptor, and doesn't necessarily have anything
105  *	to do with the underlying hardware page size.
106  *
107  *	Parameters:
108  *		fd -- file descriptor for this cache
109  *		cachesz -- number of buffers in cache (suggested)
110  *		pagesz -- logical page size inside this file
111  *		inproc -- routine to call when a buffer is read
112  *		outproc -- routine to call when a buffer is written
113  *
114  *	Returns:
115  *		Opaque pointer to the LRU cache on success, NULL on failure.
116  *
117  *	Side Effects:
118  *		Allocates memory for the hash table and LRU cache.  Buffers
119  *		are allocated on demand, later.
120  */
121 LRU
122 lruinit(fd, cachesz, pagesz, opaque, inproc, outproc)
123 	int fd;
124 	int cachesz;
125 	int pagesz;
126 	char *opaque;
127 	int (*inproc)();
128 	int (*outproc)();
129 {
130 	register LRUCACHE *l;
131 	int nbytes;
132 
133 	/* allocate the LRU cache struct */
134 	if ((l = (LRUCACHE *) malloc((unsigned) sizeof(LRUCACHE)))
135 	    == (LRUCACHE *) NULL)
136 		return ((LRU) NULL);
137 
138 	/* allocate the hash table */
139 	nbytes = cachesz * sizeof(CACHE_ENT *);
140 	if ((l->lru_cache = (CACHE_ENT **) malloc((unsigned) nbytes))
141 	    == (CACHE_ENT **) NULL) {
142 		(void) free(l);
143 		return ((LRU) NULL);
144 	}
145 
146 	/* init memory */
147 	bzero(l->lru_cache, nbytes);
148 	l->lru_fd = fd;
149 	l->lru_psize = pagesz;
150 	l->lru_csize = cachesz;
151 	l->lru_cursz = 0;
152 	l->lru_opaque = opaque;
153 	l->lru_head = l->lru_tail = (LRU_ENT *) NULL;
154 	l->lru_inproc = inproc;
155 	l->lru_outproc = outproc;
156 
157 	return ((LRU) l);
158 }
159 
160 /*
161  *  LRUGET -- Get a buffer from an LRU cache.
162  *
163  *	If the buffer is not in the cache at present, this routine will
164  *	instantiate it from the file.  This REQUIRES that the desired
165  *	block actually be on disk; we don't do non-blocking reads, so
166  *	if it's not actually out there, this routine won't return for
167  *	a very long time.  In order to instantiate a new buffer, use
168  *	lrugetnew().
169  *
170  *	Parameters:
171  *		lru -- the LRU cache to use.
172  *		pgno -- the logical block number to get.
173  *		nread -- pointer to an int, in which the number of bytes
174  *			 read is returned.
175  *
176  *	Returns:
177  *		(char *) pointer to the buffer for the desired block.  The
178  *		number of bytes actually read is returned in *nread.
179  *
180  *	Warnings:
181  *		The requested buffer is locked down until the user does
182  *		an explicit lrurelease() on it.
183  */
184 
185 char *
186 lruget(lru, pgno, nread)
187 	LRU lru;
188 	int pgno;
189 	int *nread;
190 {
191 	LRUCACHE *l = (LRUCACHE *) lru;
192 	CACHE_ENT *ce;
193 	int hashind;
194 	LRU_ENT *lruent;
195 	char *buffer;
196 	long pos;
197 
198 	/* is it already in the cache? */
199 	if ((ce = lruhashget(l, pgno)) != (CACHE_ENT *) NULL) {
200 
201 		/* yes, move it to the head and return it */
202 		lruent = ce->c_lruent;
203 		lruent->l_flags &= ~F_FREE;
204 		lruhead(l, ce->c_lruent);
205 		*nread = l->lru_psize;
206 		return (ce->c_lruent->l_buffer);
207 	}
208 
209 	/* not there, get a free page */
210 	if ((buffer = lrugetpg(l, pgno, nread, lruget)) == (char *) NULL)
211 		return ((char *) NULL);
212 
213 	/* okay, got a buffer -- fill it */
214 	pos = (long) (l->lru_psize * pgno);
215 	if (lseek(l->lru_fd, pos, L_SET) != pos)
216 		return ((char *) NULL);
217 
218 	*nread = read(l->lru_fd, buffer, l->lru_psize);
219 
220 	if (l->lru_inproc)
221 		(*(l->lru_inproc))(buffer, l->lru_opaque);
222 
223 	return (buffer);
224 }
225 
226 /*
227  *  LRUGETNEW -- Get a page for a new block in an LRU cache.
228  *
229  *	This routine allows users to instantiate pages for a file if they
230  *	don't exist on disk yet.  It's used to make a file bigger.
231  *
232  *	Parameters:
233  *		lru -- the LRU cache to use
234  *		pgno -- the (new) logical page number to instantiate
235  *		nread -- ptr to int to get number of bytes read (this is
236  *			 guaranteed to be zero, since the page isn't on disk)
237  *
238  *	Returns:
239  *		Pointer to a buffer for the associated page, or NULL on
240  *		failure.
241  *
242  *	Warnings:
243  *		The associated buffer is locked down until the user
244  *		explicitly does an lrurelease() on it.
245  */
246 
247 char *
248 lrugetnew(lru, pgno, nread)
249 	LRU lru;
250 	int pgno;
251 	int *nread;
252 {
253 	LRUCACHE *l = (LRUCACHE *) lru;
254 
255 	*nread = 0;
256 	return (lrugetpg(l, pgno, nread, lrugetnew));
257 }
258 
259 /*
260  *  LRUGETPG -- Get a free page from the LRU cache.
261  *
262  *	This routine grows the cache if necessary, finds an unused page if
263  *	it can, and handles flushing dirty buffers to disk.
264  *
265  *	One of the parameters to this routine (f) is the routine that called
266  *	us.  If we have to grow the cache, we call this routine recursively
267  *	in order to fill the buffer.  The reason for this is that we have
268  *	two interfaces that call lrugetpg().  Lruget() fills a page from disk,
269  *	and lrugetnew() just allocates a new (empty) page.
270  *
271  *	Parameters:
272  *		l -- LRU cache to use.
273  *		pgno -- page number for which we want a buffer
274  *		nread -- pointer to an int to get number of bytes read
275  *		f -- who called us
276  *
277  *	Returns:
278  *		(char *) pointer to buffer to use, or NULL on failure.
279  *
280  *	Warnings:
281  *		The buffer returned is locked down until the user does an
282  *		explicit lrurelease() on it.
283  */
284 
285 static char *
286 lrugetpg(l, pgno, nread, f)
287 	LRUCACHE *l;
288 	int pgno;
289 	int *nread;
290 	char *(*f)();
291 {
292 	CACHE_ENT *ce;
293 	LRU_ENT *lruent;
294 	char *buffer;
295 
296 	/* if we're allowed to grow the cache, do so */
297 	if (l->lru_cursz < l->lru_csize) {
298 
299 		/* get a buffer */
300 		if ((buffer = (char *) malloc((unsigned) l->lru_psize))
301 		    == (char *) NULL)
302 			return ((char *) NULL);
303 
304 		/* get and LRU list entry */
305 		if ((lruent = (LRU_ENT *) malloc((unsigned) sizeof(LRU_ENT)))
306 		    == (LRU_ENT *) NULL)
307 			return ((char *) NULL);
308 		lruent->l_buffer = buffer;
309 		lruent->l_pgno = pgno;
310 		lruent->l_flags = NULL;
311 
312 		/* manage spaghetti */
313 		lruent->l_prev = (LRU_ENT *) NULL;
314 		lruent->l_next = l->lru_head;
315 		if (l->lru_head != (LRU_ENT *) NULL)
316 			l->lru_head->l_prev = lruent;
317 		l->lru_head = lruent;
318 		if (l->lru_tail == (LRU_ENT *) NULL)
319 			l->lru_tail = lruent;
320 
321 		/* add it to the hash table */
322 		ce = lruhashput(l, pgno, lruent);
323 		l->lru_cursz++;
324 	} else {
325 		lruent = l->lru_tail;
326 
327 		/* find the oldest unused buffer */
328 		while (lruent != (LRU_ENT *) NULL
329 		       && !(lruent->l_flags & F_FREE))
330 			lruent = lruent->l_prev;
331 
332 		/* if no free buffer was found, we have to grow the cache */
333 		if (lruent == (LRU_ENT *) NULL) {
334 			if (lrugrow(l) < 0)
335 				return ((char *) NULL);
336 			return ((*f)((LRU) l, pgno, nread));
337 		}
338 
339 		/* okay, found a free buffer -- update hash table and list */
340 		ce = lruhashget(l, lruent->l_pgno);
341 		if (lruhashdel(l, lruent->l_pgno) < 0)
342 			return ((char *) NULL);
343 
344 		/* flush the old page to disk, if necessary */
345 		if (lruent->l_flags & F_DIRTY)
346 			if (lruflush(l, lruent) < 0)
347 				return ((char *) NULL);
348 
349 		/* update stats, hash table, and list */
350 		lruent->l_pgno = pgno;
351 		lruhead(l, lruent);
352 		ce = lruhashput(l, pgno, lruent);
353 		buffer = lruent->l_buffer;
354 	}
355 
356 	/* lock this page down */
357 	lruent->l_flags &= ~F_FREE;
358 
359 	return (buffer);
360 }
361 
362 /*
363  *  LRUFLUSH -- flush an LRU page to disk.
364  *
365  *	This routine forces a page to disk.  Users should use lruwrite(),
366  *	which simply marks a page dirty and does late writing.
367  *
368  *	Parameters:
369  *		l -- LRU cache
370  *		lruent -- the LRU cache entry whose page we should flush
371  *
372  *	Returns:
373  *		Zero on success, -1 on failure.
374  */
375 
376 lruflush(l, lruent)
377 	LRUCACHE *l;
378 	LRU_ENT *lruent;
379 {
380 	long pos;
381 	int nbytes;
382 
383 	if (l->lru_outproc)
384 		(*(l->lru_outproc))(lruent->l_buffer, l->lru_opaque);
385 
386 	pos = (long) (l->lru_psize * lruent->l_pgno);
387 	if (lseek(l->lru_fd, pos, L_SET) != pos)
388 		return (-1);
389 	if (write(l->lru_fd, lruent->l_buffer, l->lru_psize) != l->lru_psize)
390 		return (-1);
391 
392 	if (l->lru_inproc)
393 		(*(l->lru_inproc))(lruent->l_buffer, l->lru_opaque);
394 
395 	lruent->l_flags &= ~F_DIRTY;
396 	return (0);
397 }
398 
399 /*
400  *  LRUHASHGET -- Look up a block in the LRU cache by page number.
401  *
402  *	Parameters:
403  *		l -- LRU cache
404  *		pgno -- number of the logical page to get
405  *
406  *	Returns:
407  *		(CACHE_ENT *) pointer to the associated hash table entry
408  *		(if any), or NULL (if none).
409  */
410 
411 static CACHE_ENT *
412 lruhashget(l, pgno)
413 	LRUCACHE *l;
414 	int pgno;
415 {
416 	int hashind;
417 	CACHE_ENT *ce;
418 
419 	hashind = HASH(l, pgno);
420 
421 	/* walk the chain */
422 	for (ce = l->lru_cache[hashind];
423 	     ce != (CACHE_ENT *) NULL;
424 	     ce = ce->c_chain) {
425 		if (ce->c_pgno == pgno)
426 			return (ce);
427 	}
428 
429 	return ((CACHE_ENT *) NULL);
430 }
431 
432 /*
433  *  LRUHASHPUT -- Add an entry for a given page to the cache hash table.
434  *
435  *	This routine assumes that the given page does not yet have an entry
436  *	in the table.
437  *
438  *	Parameters:
439  *		l -- LRU cache
440  *		pgno -- logical page number for this entry
441  *		lruent -- LRU list entry at which hash table entry should
442  *			  point
443  *
444  *	Returns:
445  *		(CACHE_ENT *) pointer to the new hash table entry on success,
446  *		or NULL on failure.
447  *
448  *	Side Effects:
449  *		Allocates memory which should be freed when the hash table
450  *		entry is removed.
451  */
452 
453 static CACHE_ENT *
454 lruhashput(l, pgno, lruent)
455 	LRUCACHE *l;
456 	int pgno;
457 	LRU_ENT *lruent;
458 {
459 	int hashind;
460 	CACHE_ENT *ce;
461 
462 	if ((ce = (CACHE_ENT *) malloc((unsigned) sizeof(CACHE_ENT)))
463 	    == (CACHE_ENT *) NULL)
464 		return ((CACHE_ENT *) NULL);
465 
466 	hashind = HASH(l, pgno);
467 
468 	ce->c_pgno = pgno;
469 	ce->c_lruent = lruent;
470 	ce->c_chain = l->lru_cache[hashind];
471 	l->lru_cache[hashind] = ce;
472 
473 	return (ce);
474 }
475 
476 /*
477  *  LRUHASHDEL -- Delete the entry for a given page from the LRU cache
478  *		  hash table.
479  *
480  *	Parameters:
481  *		l -- LRU cache
482  *		pgno -- page number for which we should delete htable entry
483  *
484  *	Returns:
485  *		Zero on success, -1 on failure.
486  *
487  *	Side Effects:
488  *		Releases the memory occupied by the hash table entry.
489  */
490 
491 static int
492 lruhashdel(l, pgno)
493 	LRUCACHE *l;
494 	int pgno;
495 {
496 	CACHE_ENT *ce;
497 	CACHE_ENT *sce;
498 	int hashind;
499 
500 	sce = (CACHE_ENT *) NULL;
501 	hashind = HASH(l, pgno);
502 
503 	/* find the entry we want to delete */
504 	for (ce = l->lru_cache[hashind];
505 	     ce != (CACHE_ENT *) NULL;
506 	     ce = ce->c_chain) {
507 		if (ce->c_pgno == pgno)
508 			break;
509 		sce = ce;
510 	}
511 
512 	if (ce == (CACHE_ENT *) NULL)
513 		return (-1);
514 
515 	/* remove it from the chain */
516 	if (sce == (CACHE_ENT *) NULL)
517 		l->lru_cache[hashind] = ce->c_chain;
518 	else
519 		sce->c_chain = ce->c_chain;
520 
521 	/* release it */
522 	free (ce);
523 
524 	/* success */
525 	return (0);
526 }
527 
528 /*
529  *  LRUHEAD -- Move an LRU list entry to the head of the list.
530  *
531  *	The head of the list is where the most recently used item goes.
532  *
533  *	Parameters:
534  *		l -- LRU cache
535  *		lruent -- entry to move to the head of the list
536  *
537  *	Returns:
538  *		None
539  *
540  *	Side Effects:
541  *		Updates the cache's head and tail pointers as required.
542  */
543 
544 static void
545 lruhead(l, lruent)
546 	LRUCACHE *l;
547 	LRU_ENT *lruent;
548 {
549 	LRU_ENT *next;
550 	LRU_ENT *prev;
551 
552 	if (l->lru_head == lruent)
553 		return;
554 
555 	next = lruent->l_next;
556 	prev = lruent->l_prev;
557 	lruent->l_prev = (LRU_ENT *) NULL;
558 	lruent->l_next = l->lru_head;
559 	l->lru_head->l_prev = lruent;
560 	l->lru_head = lruent;
561 
562 	prev->l_next = next;
563 	if (next != (LRU_ENT *) NULL)
564 		next->l_prev = prev;
565 
566 	if (l->lru_tail == lruent)
567 		l->lru_tail = prev;
568 }
569 
570 /*
571  *  LRUWRITE -- Mark an LRU cache buffer dirty
572  *
573  *	This routine is how users should move their changes to disk.  The
574  *	cache code marks the associated buffer dirty, and flushes it to
575  *	disk if we need to reuse the buffer for some other page.
576  *
577  *	Parameters:
578  *		lru -- LRU cache
579  *		pgno -- page number to flush
580  *
581  *	Returns:
582  *		Zero on success, -1 on failure.
583  */
584 
585 int
586 lruwrite(lru, pgno)
587 	LRU lru;
588 	int pgno;
589 {
590 	LRUCACHE *l = (LRUCACHE *) lru;
591 	LRU_ENT *lruent;
592 	CACHE_ENT *ce;
593 
594 	if ((ce = lruhashget(l, pgno)) == (CACHE_ENT *) NULL)
595 		return (-1);
596 
597 	/* mark the entry dirty */
598 	ce->c_lruent->l_flags |= F_DIRTY;
599 
600 	return (0);
601 }
602 
603 /*
604  *  LRUSYNC -- Flush all changes to disk
605  *
606  *	This routine allows users to force all changes to buffers currently
607  *	in memory to disk.  This is expensive.
608  *
609  *	Parameters:
610  *		lru -- LRU cache to flush
611  *
612  *	Returns:
613  *		Zero on success, -1 on failure
614  *
615  *	Side Effects:
616  *		After this call, all buffers are clean.
617  */
618 
619 int
620 lrusync(lru)
621 	LRU lru;
622 {
623 	LRUCACHE *l = (LRUCACHE *) lru;
624 	LRU_ENT *le;
625 
626 	for (le = l->lru_head; le != (LRU_ENT *) NULL; le = le->l_next)
627 		if (le->l_flags & F_DIRTY)
628 			if (lruflush(l, le) < 0)
629 				return (-1);
630 	return (0);
631 }
632 
633 /*
634  *  LRUGROW -- Grow the LRU cache
635  *
636  *	This routine is called only if we can't satisfy a user's get() request
637  *	using an existing buffer.  We need to rebuild the hash table so that
638  *	subsequent lookups work correctly, since the cache size is used to
639  *	compute hash keys.
640  *
641  *	Parameters:
642  *		l -- LRU cache to grow
643  *
644  *	Returns:
645  *		Zero on success, -1 on failure
646  */
647 
648 static int
649 lrugrow(l)
650 	LRUCACHE *l;
651 {
652 	int oldsz, newsz;
653 	CACHE_ENT **new;
654 	CACHE_ENT *ce, *nce;
655 	int h;
656 	int i;
657 
658 	oldsz = l->lru_csize;
659 	newsz = l->lru_csize + 1;
660 
661 	/* get a new hash table */
662 	if ((new = (CACHE_ENT **) malloc((unsigned)newsz * sizeof(CACHE_ENT *)))
663 	    == (CACHE_ENT **) NULL)
664 		return (-1);
665 
666 	/* build the new hash table */
667 	bzero(new, (newsz * sizeof(CACHE_ENT *)));
668 	for (i = oldsz; --i >= 0; ) {
669 		for (ce = l->lru_cache[i]; ce != (CACHE_ENT *) NULL; ) {
670 			nce = ce->c_chain;
671 			h = ce->c_pgno % newsz;
672 			ce->c_chain = new[h];
673 			new[h] = ce;
674 			ce = nce;
675 		}
676 	}
677 
678 	/* get rid of the old hash table, and update the cache */
679 	free (l->lru_cache);
680 	l->lru_cache = new;
681 	l->lru_csize = newsz;
682 
683 	return (0);
684 }
685 
686 /*
687  *  LRURELEASE -- Release a buffer in the LRU cache for reuse
688  *
689  *	When the user does an lruget() or lrugetnew(), the buffer we return
690  *	is locked down, to guarantee that it's not reused while the user
691  *	still needs it.  Once a buffer is no longer needed, it should be
692  *	released (via this routine) so that we can use it for other pages
693  *	if necessary.
694  *
695  *	Parameters:
696  *		lru -- LRU cache
697  *		pgno -- page number of buffer to release
698  *
699  *	Returns:
700  *		Zero on success, -1 on failure
701  */
702 
703 int
704 lrurelease(lru, pgno)
705 	LRU lru;
706 	int pgno;
707 {
708 	LRUCACHE *l = (LRUCACHE *) lru;
709 	CACHE_ENT *ce;
710 
711 	if ((ce = lruhashget(l, pgno)) == (CACHE_ENT *) NULL)
712 		return (-1);
713 	ce->c_lruent->l_flags |= F_FREE;
714 	return (0);
715 }
716 
717 /*
718  *  LRUFREE -- Free an entire LRU cache
719  *
720  *	This routine releases an LRU cache.  The cache should not be
721  *	used again.
722  *
723  *	Parameters:
724  *		lru -- LRU cache to free
725  *
726  *	Returns:
727  *		None.
728  *
729  *	Side Effects:
730  *		Frees a lot of memory.
731  */
732 
733 void
734 lrufree(lru)
735 	LRU lru;
736 {
737 	LRUCACHE *l = (LRUCACHE *) lru;
738 	LRU_ENT *le;
739 	LRU_ENT *sle;
740 
741 	for (le = l->lru_head; le != (LRU_ENT *) NULL; ) {
742 		free(le->l_buffer);
743 		sle = le;
744 		le = le->l_next;
745 		free(sle);
746 	}
747 	free (l);
748 }
749