xref: /netbsd-src/sbin/fsck_lfs/bufcache.c (revision fad4c9f71477ae11cea2ee75ec82151ac770a534)
1 /* $NetBSD: bufcache.c,v 1.9 2006/04/19 15:52:58 christos Exp $ */
2 /*-
3  * Copyright (c) 2003 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Konrad E. Schroder <perseant@hhhh.org>.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by the NetBSD
20  *	Foundation, Inc. and its contributors.
21  * 4. Neither the name of The NetBSD Foundation nor the names of its
22  *    contributors may be used to endorse or promote products derived
23  *    from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <sys/types.h>
39 #include <sys/param.h>
40 #include <sys/time.h>
41 #include <sys/buf.h>
42 #include <sys/queue.h>
43 #include <sys/mount.h>
44 
45 #include <assert.h>
46 #include <err.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 
52 #include "bufcache.h"
53 #include "vnode.h"
54 
55 /*
56  * Definitions for the buffer free lists.
57  */
58 #define	BQUEUES		3	/* number of free buffer queues */
59 
60 #define	BQ_LOCKED	0	/* super-blocks &c */
61 #define	BQ_LRU		1	/* lru, useful buffers */
62 #define	BQ_AGE		2	/* rubbish */
63 
64 TAILQ_HEAD(bqueues, ubuf) bufqueues[BQUEUES];
65 
66 struct bufhash_struct *bufhash;
67 
68 #define HASH_MAX 1024
69 int hashmax  = HASH_MAX;
70 int hashmask = (HASH_MAX - 1);
71 
72 int maxbufs = BUF_CACHE_SIZE;
73 int nbufs = 0;
74 int cachehits = 0;
75 int cachemisses = 0;
76 int max_depth = 0;
77 off_t locked_queue_bytes = 0;
78 int locked_queue_count = 0;
79 
80 /* Simple buffer hash function */
81 static int
82 vl_hash(struct uvnode * vp, daddr_t lbn)
83 {
84 	return (int)((unsigned long) vp + lbn) & hashmask;
85 }
86 
87 /* Initialize buffer cache */
88 void
89 bufinit(int max)
90 {
91 	int i;
92 
93 	if (max) {
94 		for (hashmax = 1; hashmax < max; hashmax <<= 1)
95 			;
96 		hashmask = hashmax - 1;
97 	}
98 
99 	for (i = 0; i < BQUEUES; i++) {
100 		TAILQ_INIT(&bufqueues[i]);
101 	}
102 	bufhash = (struct bufhash_struct *)malloc(hashmax * sizeof(*bufhash));
103 	if (bufhash == NULL)
104 		err(1, NULL);
105 	for (i = 0; i < hashmax; i++)
106 		LIST_INIT(&bufhash[i]);
107 }
108 
109 /* Widen the hash table. */
110 void bufrehash(int max)
111 {
112 	int i, newhashmax, newhashmask;
113 	struct ubuf *bp, *nbp;
114 	struct bufhash_struct *np;
115 
116 	if (max < 0 || max < hashmax)
117 		return;
118 
119 	/* Round up to a power of two */
120 	for (newhashmax = 1; newhashmax < max; newhashmax <<= 1)
121 		;
122 	newhashmask = newhashmax - 1;
123 
124 	/* Allocate new empty hash table, if we can */
125 	np = (struct bufhash_struct *)malloc(newhashmax * sizeof(*bufhash));
126 		if (np == NULL)
127 			return;
128 	for (i = 0; i < newhashmax; i++)
129 		LIST_INIT(&np[i]);
130 
131 	/* Now reassign all existing buffers to their new hash chains. */
132 	for (i = 0; i < hashmax; i++) {
133 		bp = LIST_FIRST(&bufhash[i]);
134 		while(bp) {
135 			nbp = LIST_NEXT(bp, b_hash);
136 			LIST_REMOVE(bp, b_hash);
137 			bp->b_hashval = vl_hash(bp->b_vp, bp->b_lblkno);
138 			LIST_INSERT_HEAD(&np[bp->b_hashval], bp, b_hash);
139 			bp = nbp;
140 		}
141 	}
142 
143 	/* Switch over and clean up */
144 	free(bufhash);
145 	bufhash = np;
146 	hashmax = newhashmax;
147 	hashmask = newhashmask;
148 }
149 
150 /* Print statistics of buffer cache usage */
151 void
152 bufstats(void)
153 {
154 	printf("buffer cache: %d hits %d misses (%2.2f%%); hash width %d, depth %d\n",
155 	    cachehits, cachemisses,
156 	    (cachehits * 100.0) / (cachehits + cachemisses),
157 	    hashmax, max_depth);
158 }
159 
160 /*
161  * Remove a buffer from the cache.
162  * Caller must remove the buffer from its free list.
163  */
164 void
165 buf_destroy(struct ubuf * bp)
166 {
167 	bp->b_flags |= B_NEEDCOMMIT;
168 	LIST_REMOVE(bp, b_vnbufs);
169 	LIST_REMOVE(bp, b_hash);
170 	if (!(bp->b_flags & B_DONTFREE))
171 		free(bp->b_data);
172 	free(bp);
173 	--nbufs;
174 }
175 
176 /* Remove a buffer from its free list. */
177 void
178 bremfree(struct ubuf * bp)
179 {
180 	struct bqueues *dp = NULL;
181 
182 	/*
183 	 * We only calculate the head of the freelist when removing
184 	 * the last element of the list as that is the only time that
185 	 * it is needed (e.g. to reset the tail pointer).
186 	 *
187 	 * NB: This makes an assumption about how tailq's are implemented.
188 	 */
189 	if (bp->b_flags & B_LOCKED) {
190 		locked_queue_bytes -= bp->b_bcount;
191 		--locked_queue_count;
192 	}
193 	if (TAILQ_NEXT(bp, b_freelist) == NULL) {
194 		for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
195 			if (dp->tqh_last == &bp->b_freelist.tqe_next)
196 				break;
197 		if (dp == &bufqueues[BQUEUES])
198 			errx(1, "bremfree: lost tail");
199 	}
200 	++bp->b_vp->v_usecount;
201 	TAILQ_REMOVE(dp, bp, b_freelist);
202 }
203 
204 /* Return a buffer if it is in the cache, otherwise return NULL. */
205 struct ubuf *
206 incore(struct uvnode * vp, int lbn)
207 {
208 	struct ubuf *bp;
209 	int hash, depth;
210 
211 	hash = vl_hash(vp, lbn);
212 	/* XXX use a real hash instead. */
213 	depth = 0;
214 	LIST_FOREACH(bp, &bufhash[hash], b_hash) {
215 		if (++depth > max_depth)
216 			max_depth = depth;
217 		assert(depth <= nbufs);
218 		if (bp->b_vp == vp && bp->b_lblkno == lbn) {
219 			return bp;
220 		}
221 	}
222 	return NULL;
223 }
224 
225 /*
226  * Return a buffer of the given size, lbn and uvnode.
227  * If none is in core, make a new one.
228  */
229 struct ubuf *
230 getblk(struct uvnode * vp, daddr_t lbn, int size)
231 {
232 	struct ubuf *bp;
233 #ifdef DEBUG
234 	static int warned;
235 #endif
236 
237 	/*
238 	 * First check the buffer cache lists.
239 	 * We might sometimes need to resize a buffer.  If we are growing
240 	 * the buffer, its contents are invalid; but shrinking is okay.
241 	 */
242 	if ((bp = incore(vp, lbn)) != NULL) {
243 		assert(!(bp->b_flags & B_NEEDCOMMIT));
244 		assert(!(bp->b_flags & B_BUSY));
245 		bp->b_flags |= B_BUSY;
246 		bremfree(bp);
247 		if (bp->b_bcount == size)
248 			return bp;
249 		else if (bp->b_bcount > size) {
250 			assert(!(bp->b_flags & B_DELWRI));
251 			bp->b_bcount = size;
252 			bp->b_data = realloc(bp->b_data, size);
253 			if (bp->b_data == NULL)
254 				err(1, NULL);
255 			return bp;
256 		}
257 
258 		buf_destroy(bp);
259 		bp = NULL;
260 	}
261 
262 	/*
263 	 * Not on the list.
264 	 * Get a new block of the appropriate size and use that.
265 	 * If not enough space, free blocks from the AGE and LRU lists
266 	 * to make room.
267 	 */
268 	while (nbufs >= maxbufs + locked_queue_count) {
269 		bp = TAILQ_FIRST(&bufqueues[BQ_AGE]);
270 		if (bp)
271 			TAILQ_REMOVE(&bufqueues[BQ_AGE], bp, b_freelist);
272 		if (bp == NULL) {
273 			bp = TAILQ_FIRST(&bufqueues[BQ_LRU]);
274 			if (bp)
275 				TAILQ_REMOVE(&bufqueues[BQ_LRU], bp,
276 				    b_freelist);
277 		}
278 		if (bp) {
279 			if (bp->b_flags & B_DELWRI)
280 				VOP_STRATEGY(bp);
281 			buf_destroy(bp);
282 			break;
283 		}
284 #ifdef DEBUG
285 		else if (!warned) {
286 			warnx("allocating more than %d buffers", maxbufs);
287 			++warned;
288 		}
289 #endif
290 		break;
291 	}
292 	++nbufs;
293 	bp = (struct ubuf *) malloc(sizeof(*bp));
294 	if (bp == NULL)
295 		err(1, NULL);
296 	memset(bp, 0, sizeof(*bp));
297 	bp->b_data = malloc(size);
298 	if (bp->b_data == NULL)
299 		err(1, NULL);
300 	memset(bp->b_data, 0, size);
301 
302 	bp->b_vp = vp;
303 	bp->b_blkno = bp->b_lblkno = lbn;
304 	bp->b_bcount = size;
305 	bp->b_hashval = vl_hash(vp, lbn);
306 	LIST_INSERT_HEAD(&bufhash[bp->b_hashval], bp, b_hash);
307 	LIST_INSERT_HEAD(&vp->v_cleanblkhd, bp, b_vnbufs);
308 	bp->b_flags = B_BUSY;
309 
310 	return bp;
311 }
312 
313 /* Write a buffer to disk according to its strategy routine. */
314 void
315 bwrite(struct ubuf * bp)
316 {
317 	bp->b_flags &= ~(B_READ | B_DONE | B_DELWRI | B_LOCKED);
318 	VOP_STRATEGY(bp);
319 	bp->b_flags |= B_DONE;
320 	reassignbuf(bp, bp->b_vp);
321 	brelse(bp);
322 }
323 
324 /* Put a buffer back on its free list, clear B_BUSY. */
325 void
326 brelse(struct ubuf * bp)
327 {
328 	int age;
329 
330 	assert(!(bp->b_flags & B_NEEDCOMMIT));
331 	assert(bp->b_flags & B_BUSY);
332 
333 	age = bp->b_flags & B_AGE;
334 	bp->b_flags &= ~(B_BUSY | B_AGE);
335 	if (bp->b_flags & B_INVAL) {
336 		buf_destroy(bp);
337 		return;
338 	}
339 	if (bp->b_flags & B_LOCKED) {
340 		locked_queue_bytes += bp->b_bcount;
341 		++locked_queue_count;
342 		TAILQ_INSERT_TAIL(&bufqueues[BQ_LOCKED], bp, b_freelist);
343 	} else if (age) {
344 		TAILQ_INSERT_TAIL(&bufqueues[BQ_AGE], bp, b_freelist);
345 	} else {
346 		TAILQ_INSERT_TAIL(&bufqueues[BQ_LRU], bp, b_freelist);
347 	}
348 	--bp->b_vp->v_usecount;
349 
350 	/* Move to the front of the hash chain */
351 	if (LIST_FIRST(&bufhash[bp->b_hashval]) != bp) {
352 		LIST_REMOVE(bp, b_hash);
353 		LIST_INSERT_HEAD(&bufhash[bp->b_hashval], bp, b_hash);
354 	}
355 }
356 
357 /* Read the given block from disk, return it B_BUSY. */
358 int
359 bread(struct uvnode * vp, daddr_t lbn, int size, void * unused,
360     struct ubuf ** bpp)
361 {
362 	struct ubuf *bp;
363 	daddr_t daddr;
364 	int error;
365 
366 	bp = getblk(vp, lbn, size);
367 	*bpp = bp;
368 	if (bp->b_flags & (B_DELWRI | B_DONE)) {
369 		++cachehits;
370 		return 0;
371 	}
372 	++cachemisses;
373 
374 	/*
375 	 * Not found.  Need to find that block's location on disk,
376 	 * and load it in.
377 	 */
378 	daddr = -1;
379 	error = VOP_BMAP(vp, lbn, &daddr);
380 	bp->b_blkno = daddr;
381 	if (daddr >= 0) {
382 		bp->b_flags |= B_READ;
383 		return VOP_STRATEGY(bp);
384 	}
385 	memset(bp->b_data, 0, bp->b_bcount);
386 	return 0;
387 }
388 
389 /* Move a buffer between dirty and clean block lists. */
390 void
391 reassignbuf(struct ubuf * bp, struct uvnode * vp)
392 {
393 	LIST_REMOVE(bp, b_vnbufs);
394 	if (bp->b_flags & B_DELWRI) {
395 		LIST_INSERT_HEAD(&vp->v_dirtyblkhd, bp, b_vnbufs);
396 	} else {
397 		LIST_INSERT_HEAD(&vp->v_cleanblkhd, bp, b_vnbufs);
398 	}
399 }
400 
401 #ifdef DEBUG
402 void
403 dump_free_lists(void)
404 {
405 	struct ubuf *bp;
406 	int i;
407 
408 	for (i = 0; i <= BQ_LOCKED; i++) {
409 		printf("==> free list %d:\n", i);
410 		TAILQ_FOREACH(bp, &bufqueues[i], b_freelist) {
411 			printf("vp %p lbn %" PRId64 " flags %lx\n",
412 				bp->b_vp, bp->b_lblkno, bp->b_flags);
413 		}
414 	}
415 }
416 #endif
417