xref: /netbsd-src/sbin/dump/rcache.c (revision 27578b9aac214cc7796ead81dcc5427e79d5f2a0)
1 /*      $NetBSD: rcache.c,v 1.6 2001/05/27 14:17:57 lukem Exp $       */
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Martin J. Laubach <mjl@emsi.priv.at> and
9  *    Manuel Bouyer <Manuel.Bouyer@lip6.fr>.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *      This product includes software developed by the NetBSD
22  *      Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 /*-----------------------------------------------------------------------*/
40 #include <sys/types.h>
41 #include <sys/uio.h>
42 #include <sys/mman.h>
43 #include <sys/param.h>
44 #include <sys/sysctl.h>
45 #include <ufs/ufs/dinode.h>
46 
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <unistd.h>
50 #include <fcntl.h>
51 #include <errno.h>
52 #include <string.h>
53 
54 #include "dump.h"
55 
56 /*-----------------------------------------------------------------------*/
57 #define MAXCACHEBUFS	512	/* max 512 buffers */
58 #define MAXMEMPART	6	/* max 15% of the user mem */
59 
60 /*-----------------------------------------------------------------------*/
61 struct cheader {
62 	volatile size_t count;
63 };
64 
65 struct cdesc {
66 	volatile daddr_t blkstart;
67 	volatile daddr_t blkend;/* start + nblksread */
68 	volatile daddr_t blocksRead;
69 	volatile size_t time;
70 #ifdef DIAGNOSTICS
71 	volatile pid_t owner;
72 #endif
73 };
74 
75 static int findlru(void);
76 
77 static void *shareBuffer = NULL;
78 static struct cheader *cheader;
79 static struct cdesc *cdesc;
80 static char *cdata;
81 static int cachebufs;
82 static int nblksread;
83 
84 #ifdef STATS
85 static int nreads;
86 static int nphysread;
87 static int64_t readsize;
88 static int64_t physreadsize;
89 #endif
90 
91 #define CDATA(i)	(cdata + ((i) * nblksread * dev_bsize))
92 
93 void
94 initcache(int cachesize, int readblksize)
95 {
96 	size_t len;
97 	size_t  sharedSize;
98 
99 	nblksread = (readblksize + ufsib->ufs_bsize - 1) / ufsib->ufs_bsize;
100 	if(cachesize == -1) {	/* Compute from memory available */
101 		int usermem;
102 		int mib[2] = { CTL_HW, HW_USERMEM };
103 
104 		len = sizeof(usermem);
105 		if (sysctl(mib, 2, &usermem, &len, NULL, 0) < 0) {
106 			msg("sysctl(hw.usermem) failed: %s\n", strerror(errno));
107 			return;
108 		}
109 		cachebufs = (usermem / MAXMEMPART) / (nblksread * dev_bsize);
110 	} else {		/* User specified */
111 		cachebufs = cachesize;
112 	}
113 
114 	if(cachebufs) {	/* Don't allocate if zero --> no caching */
115 		if (cachebufs > MAXCACHEBUFS)
116 			cachebufs = MAXCACHEBUFS;
117 
118 		sharedSize = sizeof(struct cheader) +
119 	   	    sizeof(struct cdesc) * cachebufs +
120 	   	    nblksread * cachebufs * dev_bsize;
121 #ifdef STATS
122 		fprintf(stderr, "Using %d buffers (%d bytes)\n", cachebufs,
123 	   	    sharedSize);
124 #endif
125 		shareBuffer = mmap(NULL, sharedSize, PROT_READ | PROT_WRITE,
126 	   	    MAP_ANON | MAP_SHARED, -1, 0);
127 		if (shareBuffer == (void *)-1) {
128 			msg("can't mmap shared memory for buffer: %s\n",
129 			    strerror(errno));
130 			return;
131 		}
132 		cheader = shareBuffer;
133 		cdesc = (struct cdesc *) (((char *) shareBuffer) +
134 		    sizeof(struct cheader));
135 		cdata = ((char *) shareBuffer) + sizeof(struct cheader) +
136 	   	    sizeof(struct cdesc) * cachebufs;
137 
138 		memset(shareBuffer, '\0', sharedSize);
139 	}
140 }
141 
142 /*
143  * Find the cache buffer descriptor that shows the minimal access time
144  */
145 static int
146 findlru(void)
147 {
148 	int     i;
149 	int     minTime = cdesc[0].time;
150 	int     minIdx = 0;
151 
152 	for (i = 0; i < cachebufs; i++) {
153 		if (cdesc[i].time < minTime) {
154 			minIdx = i;
155 			minTime = cdesc[i].time;
156 		}
157 	}
158 
159 	return minIdx;
160 }
161 
162 /*
163  * Read data directly from disk, with smart error handling.
164  * Try to recover from hard errors by reading in sector sized pieces.
165  * Error recovery is attempted at most BREADEMAX times before seeking
166  * consent from the operator to continue.
167  */
168 
169 static int breaderrors = 0;
170 #define BREADEMAX 32
171 
172 void
173 rawread(daddr_t blkno, char *buf, int size)
174 {
175 	int cnt, i;
176 #ifdef STATS
177 	nphysread++;
178 	physreadsize += size;
179 #endif
180 
181 	if (lseek(diskfd, ((off_t) blkno << dev_bshift), 0) < 0) {
182 		msg("rawread: lseek fails\n");
183 		goto err;
184 	}
185 	if ((cnt =  read(diskfd, buf, size)) == size)
186 		return;
187 	if (cnt == -1)
188 		msg("read error from %s: %s: [block %d]: count=%d\n",
189 			disk, strerror(errno), blkno, size);
190 	else
191 		msg("short read error from %s: [block %d]: count=%d, got=%d\n",
192 			disk, blkno, size, cnt);
193 err:
194 	if (++breaderrors > BREADEMAX) {
195 		msg("More than %d block read errors from %s\n",
196 			BREADEMAX, disk);
197 		broadcast("DUMP IS AILING!\n");
198 		msg("This is an unrecoverable error.\n");
199 		if (!query("Do you want to attempt to continue?")){
200 			dumpabort(0);
201 			/*NOTREACHED*/
202 		} else
203 			breaderrors = 0;
204 	}
205 	/*
206 	 * Zero buffer, then try to read each sector of buffer separately.
207 	 */
208 	memset(buf, 0, size);
209 	for (i = 0; i < size; i += dev_bsize, buf += dev_bsize, blkno++) {
210 		if (lseek(diskfd, ((off_t)blkno << dev_bshift), 0) < 0) {
211 			msg("rawread: lseek2 fails: %s!\n",
212 			    strerror(errno));
213 			continue;
214 		}
215 		if ((cnt = read(diskfd, buf, (int)dev_bsize)) == dev_bsize)
216 			continue;
217 		if (cnt == -1) {
218 			msg("read error from %s: %s: [sector %d]: count=%ld: "
219 			    "%s\n", disk, strerror(errno), blkno, dev_bsize,
220 			    strerror(errno));
221 			continue;
222 		}
223 		msg("short read error from %s: [sector %d]: count=%ld, got=%d\n",
224 		    disk, blkno, dev_bsize, cnt);
225 	}
226 }
227 
228 void
229 bread(daddr_t blkno, char *buf, int size)
230 {
231 	int     osize = size;
232 	daddr_t oblkno = blkno;
233 	char   *obuf = buf;
234 	daddr_t numBlocks = (size + dev_bsize -1) / dev_bsize;
235 
236 #ifdef STATS
237 	nreads++;
238 	readsize += size;
239 #endif
240 
241 	if (!shareBuffer) {
242 		rawread(blkno, buf, size);
243 		return;
244 	}
245 
246 	if (flock(diskfd, LOCK_EX)) {
247 		msg("flock(LOCK_EX) failed: %s\n",
248 		    strerror(errno));
249 		rawread(blkno, buf, size);
250 		return;
251 	}
252 
253 
254 retry:
255 	while(size > 0) {
256 		int     i;
257 
258 		for (i = 0; i < cachebufs; i++) {
259 			struct cdesc *curr = &cdesc[i];
260 
261 #ifdef DIAGNOSTICS
262 			if (curr->owner) {
263 				fprintf(stderr, "Owner is set (%d, me=%d), can"
264 				    "not happen.\n", curr->owner, getpid());
265 			}
266 #endif
267 
268 			if (curr->blkend == 0)
269 				continue;
270 			/*
271 			 * If we find a bit of the read in the buffers,
272 			 * now compute how many blocks we can copy,
273 			 * copy them out, adjust blkno, buf and size,
274 			 * and restart
275 			 */
276 			if (curr->blkstart <= blkno &&
277 			    blkno < curr->blkend) {
278 				/* Number of data blocks to be copied */
279 				int toCopy = MIN(size,
280 				    (curr->blkend - blkno) * dev_bsize);
281 #ifdef DIAGNOSTICS
282 				if (toCopy <= 0 ||
283 				    toCopy > nblksread * dev_bsize) {
284 					fprintf(stderr, "toCopy %d !\n",
285 					    toCopy);
286 					dumpabort(0);
287 				}
288 				if (CDATA(i) + (blkno - curr->blkstart) *
289 			   	    dev_bsize < CDATA(i) ||
290 			   	    CDATA(i) + (blkno - curr->blkstart) *
291 			   	    dev_bsize >
292 				    CDATA(i) + nblksread * dev_bsize) {
293 					fprintf(stderr, "%p < %p !!!\n",
294 				   	   CDATA(i) + (blkno -
295 						curr->blkstart) * dev_bsize,
296 					   CDATA(i));
297 					fprintf(stderr, "cdesc[i].blkstart %d "
298 					    "blkno %d dev_bsize %ld\n",
299 				   	    curr->blkstart, blkno, dev_bsize);
300 					dumpabort(0);
301 				}
302 #endif
303 				memcpy(buf, CDATA(i) +
304 				    (blkno - curr->blkstart) * dev_bsize,
305 			   	    toCopy);
306 
307 				buf 	+= toCopy;
308 				size 	-= toCopy;
309 				blkno 	+= (toCopy + dev_bsize - 1) / dev_bsize;
310 				numBlocks -=
311 				    (toCopy  + dev_bsize - 1) / dev_bsize;
312 
313 				curr->time = cheader->count++;
314 
315 				/*
316 				 * If all data of a cache block have been
317 				 * read, chances are good no more reads
318 				 * will occur, so expire the cache immediately
319 				 */
320 
321 				curr->blocksRead +=
322 				    (toCopy + dev_bsize -1) / dev_bsize;
323 				if (curr->blocksRead >= nblksread)
324 					curr->time = 0;
325 
326 				goto retry;
327 			}
328 		}
329 
330 		/* No more to do? */
331 		if (size == 0)
332 			break;
333 
334 		/*
335 		 * This does actually not happen if fs blocks are not greater
336 		 * than nblksread.
337 		 */
338 		if (numBlocks > nblksread) {
339 			rawread(oblkno, obuf, osize);
340 			break;
341 		} else {
342 			int     idx;
343 			ssize_t rsize;
344 			daddr_t blockBlkNo;
345 
346 			blockBlkNo = (blkno / nblksread) * nblksread;
347 			idx = findlru();
348 			rsize = MIN(nblksread,
349 			    ufsib->ufs_dsize - blockBlkNo) *
350 			    dev_bsize;
351 
352 #ifdef DIAGNOSTICS
353 			if (cdesc[idx].owner)
354 				fprintf(stderr, "Owner is set (%d, me=%d), can"
355 				    "not happen(2).\n", cdesc[idx].owner,
356 				    getpid());
357 			cdesc[idx].owner = getpid();
358 #endif
359 			cdesc[idx].time = cheader->count++;
360 			cdesc[idx].blkstart = blockBlkNo;
361 			cdesc[idx].blocksRead = 0;
362 
363 			if (lseek(diskfd,
364 			    ((off_t) (blockBlkNo) << dev_bshift), 0) < 0) {
365 				msg("readBlocks: lseek fails: %s\n",
366 				    strerror(errno));
367 				rsize = -1;
368 			} else {
369 				rsize = read(diskfd, CDATA(idx), rsize);
370 				if (rsize < 0) {
371 					msg("readBlocks: read fails: %s\n",
372 					    strerror(errno));
373 				}
374 			}
375 
376 			/* On errors, panic, punt, try to read without
377 			 * cache and let raw read routine do the rest.
378 			 */
379 
380 			if (rsize <= 0) {
381 				rawread(oblkno, obuf, osize);
382 #ifdef DIAGNOSTICS
383 				if (cdesc[idx].owner != getpid())
384 					fprintf(stderr, "Owner changed from "
385 					    "%d to %d, can't happen\n",
386 					    getpid(), cdesc[idx].owner);
387 				cdesc[idx].owner = 0;
388 #endif
389 				break;
390 			}
391 
392 			/* On short read, just note the fact and go on */
393 			cdesc[idx].blkend = blockBlkNo + rsize / dev_bsize;
394 
395 #ifdef STATS
396 			nphysread++;
397 			physreadsize += rsize;
398 #endif
399 #ifdef DIAGNOSTICS
400 			if (cdesc[idx].owner != getpid())
401 				fprintf(stderr, "Owner changed from "
402 				    "%d to %d, can't happen\n",
403 				    getpid(), cdesc[idx].owner);
404 			cdesc[idx].owner = 0;
405 #endif
406 			/*
407 			 * We swapped some of data in, let the loop fetch
408 			 * them from cache
409 			 */
410 		}
411 	}
412 
413 	if (flock(diskfd, LOCK_UN))
414 		msg("flock(LOCK_UN) failed: %s\n",
415 		    strerror(errno));
416 	return;
417 }
418 
419 void
420 printcachestats(void)
421 {
422 #ifdef STATS
423 	fprintf(stderr, "Pid %d: %d reads (%u bytes) "
424 	    "%d physical reads (%u bytes) %d%% hits, %d%% overhead\n",
425 	    getpid(), nreads, (u_int) readsize, nphysread,
426 	    (u_int) physreadsize, (nreads - nphysread) * 100 / nreads,
427 	    (int) (((physreadsize - readsize) * 100) / readsize));
428 #endif
429 }
430